# Import packages
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#install.packages("xts")
#install.packages("forecast", dependencies = TRUE)
library(forecast)
#install.packages("caTools")
library(caTools)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ################################### WARNING ###################################
## # We noticed you have dplyr installed. The dplyr lag() function breaks how    #
## # base R's lag() function is supposed to work, which breaks lag(my_xts).      #
## #                                                                             #
## # If you call library(dplyr) later in this session, then calls to lag(my_xts) #
## # that you enter or source() into this session won't work correctly.          #
## #                                                                             #
## # All package code is unaffected because it is protected by the R namespace   #
## # mechanism.                                                                  #
## #                                                                             #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## # You can use stats::lag() to make sure you're not using dplyr::lag(), or you #
## # can add conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop   #
## # dplyr from breaking base R's lag() function.                                #
## ################################### WARNING ###################################
#Reading the Data
data <- read.csv('Final_Dataframe2.csv')
# Function to split data into train and set
train_test_split <- function(data) {
  train <- window(data, start = 2013, end = c(2021, 12))
  test <- window(data, start = 2022, end = c(2022, 12))
  #train <- ts(data,start = 2013,end =c(2021,12),frequency = 12)
  #test <- ts(data,start = 2022,end = c(2022,12) , frequency = 12)
  result <- list("train" = train,"test" = test)
  return(result)
}
# ARIMA Fitting
arima_fit <- function(train,test){
  arima_model <- auto.arima(train)
  summary(arima_model)
  fc_arima <- forecast(arima_model,12)
  plot(fc_arima)
  acc_arima <- accuracy(fc_arima,test)
  acc_arima
  return(list("Model" = arima_model, "Forecast" = fc_arima))
}
# Simple exponential smoothing
exp_smooth_fitting <- function(train,test){
  ses_model <- ses(train)
  summary(ses_model)
  fc_ses <- forecast(ses_model,12)
  plot(fc_ses)
  acc_ets <- accuracy(fc_ses,test)
  acc_ets
  return(list("Model" = ses_model, "Forecast" = fc_ses))
}
# Double exponential smoothing
db_smooth_fitting <- function(train,test){
  db_model <- hw(train,seasonal = "additive")
  summary(db_model)
  fc_db <- forecast(db_model,h = length(test))
  plot(fc_db)
  acc_ets <- accuracy(fc_db,test[1:12])
  acc_ets
  return(list("Model" = db_model, "Forecast" = fc_db))
}
# ETS Function
ets_fitting <- function(train,test) {
  ets_model <- ets(train)
  summary(ets_model)
  fc_ets <- forecast(ets_model,12)
  plot(fc_ets)
  acc_ets <- accuracy(fc_ets,test[1:12])
  acc_ets
  return(list("Model" = ets_model, "Forecast" = fc_ets))
  }
# Check adf and kpss test for stationary
adf_kpss_test <- function(ts_data,city_name) {
  components <- decompose(ts_data)
  plot(components)
  title(sub = city_name)
  print(adf.test(ts_data))
  print(kpss.test(ts_data))
}
# Forecast for 2023 and compare with 2022
forecast_2023 <- function(data,org,name){
  summary(data)
  forecast_values <- forecast(data,h=12)
  y_2022 <- window(org, start = c(2022, 1), end = c(2022, 12))
  combined_ts <- cbind(y_2022,forecast_values$mean)
  df <- as.data.frame(combined_ts)
  st<-stack(df)
  st<-na.omit(st)
  comp <- ts(st$values,start=c(2022,1), end=c(2023,12),frequency = 12)
  ts_95_percentile <- quantile(comp, probs = 0.95)
  print(comp)
  plot(comp,main=name)
  abline(h = ts_95_percentile, col = "red",lty=2)
  abline(v = '2023.0', col = "blue",lty=2)
}

1. Bakersfield, CA

# 1. Bakersfield, CA
baker <- data[data$CBSA_NAME == 'Bakersfield, CA',c("year","PM25_Level_FINAL")]
baker <- ts(baker$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

adf_kpss_test(baker,"Bakersfield, CA")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.0924, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.14389, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(baker, lag=50, main="ACF of Bakersfield, CA")
pacf(baker, lag=50, main="PACF of Bakersfield, CA")
acf(baker, lag.max = 12, main="SACF of Bakersfield, CA")
pacf(baker, lag.max = 12, main="SPACF of Bakersfield, CA")

# Split the data
splited <- train_test_split(baker)
baker_train <- splited$train
baker_test <- splited$test
baker_train
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2013 20.140717 15.773202 10.859236 10.259330 13.163587 13.264960 13.286845
## 2014 29.076029 11.206718  8.401697  8.626556  8.380872 13.972008 10.272080
## 2015 26.372614 16.700171  8.586787  7.917385  6.981461  8.415963  8.731262
## 2016  9.405342 13.265538  6.162435  6.964940  6.535630  8.768463  9.877070
## 2017  9.398602  6.336369  9.404904  6.424881  7.165264  9.091389 11.612526
## 2018 18.985599 12.464243  5.599525  7.515954  7.959923  9.344902 13.603409
## 2019 12.332949  4.678605  5.557176  5.727843  6.488621  8.143120  9.535600
## 2020  9.985740  9.292946  4.513959  6.261905  6.385131  8.151817 11.618848
## 2021 14.251243 10.456505  6.224932  8.004313  7.639459  8.908323  9.246121
##            Aug       Sep       Oct       Nov       Dec
## 2013 14.314272 11.693967 11.283602 12.886810 28.463729
## 2014  9.755219  9.942987 10.347369 23.218082 12.548488
## 2015 12.254759 10.268269  9.371513 14.268704 14.120248
## 2016 13.929826 11.228524  8.824592 17.495606 16.810945
## 2017 11.374773  9.373505 13.882949 12.374757 24.506640
## 2018 18.834403  9.559698  7.911756 15.426171 11.706371
## 2019  8.645330  8.149579  9.573955 13.656426  6.688798
## 2020 25.019042 31.700052 22.624985 15.365417 14.393610
## 2021 19.957088 17.985341 13.590883 18.221664 13.803149
baker_test
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 15.762213 11.284405  6.505184  7.281095  6.958241  6.999548  8.661705
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.582796 10.456444  9.813333 14.756897 13.237197
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(baker_train,baker_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.7914 
## 
##   Initial states:
##     l = 19.0144 
## 
##   sigma:  5.178
## 
##      AIC     AICc      BIC 
## 864.8442 865.0749 872.8906 
## 
## Error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.05208498 5.129791 3.609621 -9.411116 31.07976 0.9220053
##                    ACF1
## Training set 0.04320404
## 
## Forecasts:
##          Point Forecast       Lo 80    Hi 80       Lo 95    Hi 95
## Jan 2022       14.56274  7.92692123 21.19856   4.4141298 24.71135
## Feb 2022       14.56274  6.10034855 23.02514   1.6206280 27.50486
## Mar 2022       14.56274  4.60331990 24.52216  -0.6688798 29.79436
## Apr 2022       14.56274  3.30360908 25.82188  -2.6566160 31.78210
## May 2022       14.56274  2.13913302 26.98635  -4.4375284 33.56301
## Jun 2022       14.56274  1.07481978 28.05066  -6.0652551 35.19074
## Jul 2022       14.56274  0.08855699 29.03693  -7.5736138 36.69910
## Aug 2022       14.56274 -0.83466095 29.96015  -8.9855538 38.11104
## Sep 2022       14.56274 -1.70557086 30.83106 -10.3174956 39.44298
## Oct 2022       14.56274 -2.53216921 31.65765 -11.5816687 40.70715
ses_final <- data.frame(Actuals = baker_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##      Actuals Forecast
## 1  15.762213 14.56274
## 2  11.284405 14.56274
## 3   6.505184 14.56274
## 4   7.281095 14.56274
## 5   6.958241 14.56274
## 6   6.999548 14.56274
## 7   8.661705 14.56274
## 8   9.582796 14.56274
## 9  10.456444 14.56274
## 10  9.813333 14.56274
# Double exponential smoothing
fc_db <- db_smooth_fitting(baker_train,baker_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.3687 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 15.8813 
##     b = -0.1241 
##     s = 4.5374 4.0817 0.4048 1.4413 3.1322 -1.2052
##            -2.2925 -4.168 -4.5716 -5.0396 -0.9372 4.6166
## 
##   sigma:  4.5056
## 
##      AIC     AICc      BIC 
## 847.5029 854.3029 893.0992 
## 
## Error measures:
##                     ME     RMSE     MAE       MPE    MAPE      MASE      ACF1
## Training set 0.2393533 4.158489 2.91631 -2.834404 24.3377 0.7449128 0.1949724
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2022      16.581649 10.8074789 22.35582  7.7508179 25.41248
## Feb 2022      10.906412  4.7520789 17.06075  1.4941717 20.31865
## Mar 2022       6.682782  0.1702471 13.19532 -3.2772809 16.64285
## Apr 2022       7.029052  0.1768308 13.88127 -3.4505162 17.50862
## May 2022       7.311181  0.1351588 14.48720 -3.6635977 18.28596
## Jun 2022       9.065267  1.5792688 16.55126 -2.3835790 20.51411
## Jul 2022      10.031112  2.2473105 17.81491 -1.8731849 21.93541
## Aug 2022      14.246831  6.1760511 22.31761  1.9036384 26.59002
## Sep 2022      12.434658  4.0866082 20.78271 -0.3325824 25.20190
## Oct 2022      11.276426  2.6598775 19.89298 -1.9014479 24.45430
## Nov 2022      14.832222  5.9551489 23.70929  1.2559104 28.40853
## Dec 2022      15.166118  6.0358133 24.29642  1.2025221 29.12971
## Jan 2023      15.124300  5.7473270 24.50127  0.7834574 29.46514
## Feb 2023       9.449063 -0.1682503 19.06638 -5.2593484 24.15747
## Mar 2023       5.225433 -4.6264873 15.07735 -9.8417791 20.29265
## Apr 2023       5.571703 -4.5094920 15.65290 -9.8461545 20.98956
## May 2023       5.853832 -4.4516618 16.15932 -9.9070604 21.61472
## Jun 2023       7.607917 -2.9172152 18.13305 -8.4888838 23.70472
## Jul 2023       8.573763 -2.1666367 19.31416 -7.8522606 24.99979
## Aug 2023      12.789482  1.8379307 23.74103 -3.9594702 29.53843
## Sep 2023      10.977309 -0.1815126 22.13613 -6.0886359 28.04325
## Oct 2023       9.819077 -1.5433459 21.18150 -7.5582495 27.19640
## Nov 2023      13.374873  1.8123226 24.93742 -4.3085216 31.05827
## Dec 2023      13.708769  1.9493893 25.46815 -4.2756501 31.69319
db_final <- data.frame(Actuals = baker_test, Forecast = fc_db$Forecast$mean)
db_final
##      Actuals  Forecast
## 1  15.762213 16.581649
## 2  11.284405 10.906412
## 3   6.505184  6.682782
## 4   7.281095  7.029052
## 5   6.958241  7.311181
## 6   6.999548  9.065267
## 7   8.661705 10.031112
## 8   9.582796 14.246831
## 9  10.456444 12.434658
## 10  9.813333 11.276426
## 11 14.756897 14.832222
## 12 13.237197 15.166118
# ETS
fc_ets <- ets_fitting(baker_train,baker_test)

summary(fc_ets$Model)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.4651 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 14.2122 
##     s = 1.415 1.3205 0.9094 1.0665 1.2845 0.8867
##            0.8111 0.6422 0.6502 0.6541 0.9634 1.3963
## 
##   sigma:  0.3146
## 
##      AIC     AICc      BIC 
## 792.6042 797.8216 832.8362 
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE    MAPE      MASE      ACF1
## Training set -0.1217069 4.324555 2.886808 -6.715368 23.2308 0.7373772 0.1738792
ets_final <- data.frame(Actuals = baker_test, Forecast = fc_ets$Forecast$mean)
ets_final
##      Actuals  Forecast
## 1  15.762213 17.074019
## 2  11.284405 11.781209
## 3   6.505184  7.998143
## 4   7.281095  7.951199
## 5   6.958241  7.853922
## 6   6.999548  9.919221
## 7   8.661705 10.843482
## 8   9.582796 15.707525
## 9  10.456444 13.042466
## 10  9.813333 11.121462
## 11 14.756897 16.147737
## 12 13.237197 17.302189
# ARIMA fitting
fc_arima <- arima_fit(baker_train,baker_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,0)(1,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1    sar1     mean
##       0.4680  0.2719  12.0840
## s.e.  0.0885  0.0984   1.0445
## 
## sigma^2 = 19.96:  log likelihood = -313.98
## AIC=635.96   AICc=636.35   BIC=646.69
## 
## Training set error measures:
##                       ME     RMSE     MAE       MPE     MAPE      MASE
## Training set -0.07702984 4.405575 3.09575 -12.27469 27.46468 0.7907472
##                    ACF1
## Training set 0.01283631
arima_final <- data.frame(Actuals = baker_test, Forecast = fc_arima$Forecast$mean)
arima_final
##      Actuals Forecast
## 1  15.762213 13.18392
## 2  11.284405 11.88055
## 3   6.505184 10.60296
## 4   7.281095 11.02722
## 5   6.958241 10.90018
## 6   6.999548 11.23212
## 7   8.661705 11.31786
## 8   9.582796 14.22701
## 9  10.456444 13.68961
## 10  9.813333 12.49426
## 11 14.756897 13.75294
## 12 13.237197 12.55154
forecasts <- Arima(baker, order=c(1,0,0), seasonal = list(order = c(1,0,0), period = 12)) 
forecast_2023(forecasts,baker,"Bakersfield, CA")
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 15.762213 11.284405  6.505184  7.281095  6.958241  6.999548  8.661705
## 2023 13.331169 11.872483 10.442398 10.608335 10.495002 10.495114 10.951127
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.582796 10.456444  9.813333 14.756897 13.237197
## 2023 11.204237 11.445533 11.266392 12.638552 12.216511

2. Fresno, CA

# 2. Fresno, CA
fresno<- data[data$CBSA_NAME == 'Fresno, CA',c("year","PM25_Level_FINAL")]
fresno <- ts(fresno$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(fresno,"Fresno, CA")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.7383, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.043568, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(fresno, lag=50, main="ACF of Fresno, CA")
pacf(fresno, lag=50, main="PACF of Fresno, CA")
acf(fresno, lag.max = 12, main="SACF of Fresno, CA")
pacf(fresno, lag.max = 12, main="SPACF of Fresno, CA")

# Split the data
splited <- train_test_split(fresno)
fresno_train <- splited$train
fresno_test <- splited$test
fresno_train
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2013 24.896615 16.479742  9.552815  7.581246  8.971452  8.327099 11.826925
## 2014 35.482518 12.272758  6.862975  7.413083  6.748749  8.119676 10.053747
## 2015 29.620179 16.189031  8.199306  7.107335  7.122975  8.889808  8.951372
## 2016 14.493867 15.497752  7.115618  6.565527  5.896750  6.733572  8.950342
## 2017  8.215194  6.409674  7.767211  5.885159  7.604327  7.241441  9.906303
## 2018 21.329513 14.739190  5.257752  6.337810  6.302283  7.629013 13.155818
## 2019 13.860471  5.647300  4.854903  6.038366  5.799203  7.574327  8.328113
## 2020 13.471260 12.372962  4.737061  5.187535  5.044565  6.423449  9.333250
## 2021 17.073675 10.153338  6.503066  7.404664  6.907742  7.340619  9.702515
##            Aug       Sep       Oct       Nov       Dec
## 2013  9.133486  8.234573 11.920257 20.232620 35.228047
## 2014 10.504265 11.400684 10.069319 23.764935 13.298565
## 2015 11.628934 11.725741 10.641510 16.597221 16.998522
## 2016 12.067680 10.393336  7.582665 15.717071 17.723999
## 2017 13.598233 12.382759 14.887477 14.222665 34.624437
## 2018 22.922987 10.266189  9.333086 24.882113 16.145402
## 2019  8.311188  6.709894 10.502977 15.349192  9.435438
## 2020 31.145640 45.421304 29.461272 18.865094 20.022088
## 2021 23.337670 14.858476 19.118643 18.550964 13.466018
fresno_test
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 21.735106 13.380527  7.099492  6.289888  6.193088  6.117619  8.094263
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.062323  9.546349  9.624117 14.571905 13.263767
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(fresno_train,fresno_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.9321 
## 
##   Initial states:
##     l = 24.2957 
## 
##   sigma:  6.9888
## 
##      AIC     AICc      BIC 
## 929.6218 929.8525 937.6682 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.1041351 6.923774 4.595902 -11.16437 35.50558 0.9499326
##                    ACF1
## Training set 0.01068499
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80       Lo 95    Hi 95
## Jan 2022       13.81274   4.856250 22.76923   0.1149696 27.51051
## Feb 2022       13.81274   1.568835 26.05665  -4.9126982 32.53818
## Mar 2022       13.81274  -1.006427 28.63191  -8.8512218 36.47671
## Apr 2022       13.81274  -3.196147 30.82163 -12.2001086 39.82559
## May 2022       13.81274  -5.134469 32.75995 -15.1645170 42.79000
## Jun 2022       13.81274  -6.892121 34.51760 -17.8526126 45.47810
## Jul 2022       13.81274  -8.511815 36.13730 -20.3297217 47.95521
## Aug 2022       13.81274 -10.021695 37.64718 -22.6388831 50.26437
## Sep 2022       13.81274 -11.441463 39.06695 -24.8102319 52.43572
## Oct 2022       13.81274 -12.785555 40.41104 -26.8658427 54.49133
ses_final <- data.frame(Actuals = fresno_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##      Actuals Forecast
## 1  21.735106 13.81274
## 2  13.380527 13.81274
## 3   7.099492 13.81274
## 4   6.289888 13.81274
## 5   6.193088 13.81274
## 6   6.117619 13.81274
## 7   8.094263 13.81274
## 8   9.062323 13.81274
## 9   9.546349 13.81274
## 10  9.624117 13.81274
# Double exponential smoothing
fc_db <- db_smooth_fitting(fresno_train,fresno_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.4969 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 16.3396 
##     b = -0.0578 
##     s = 7.5023 6.4124 0.8703 2.4684 2.5585 -2.8906
##            -5.2005 -6.1716 -6.1497 -5.8185 -0.4066 6.8257
## 
##   sigma:  6.1573
## 
##      AIC     AICc      BIC 
## 914.9621 921.7621 960.5583 
## 
## Error measures:
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 8.323436e-05 5.682905 3.772877 -6.289064 28.84877 0.7798206
##                   ACF1
## Training set 0.1570167
## 
## Forecasts:
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2022      16.856734   8.9658714 24.74760   4.788701 28.92477
## Feb 2022       9.566346   0.7545281 18.37816  -3.910166 23.04286
## Mar 2022       4.096726  -5.5488306 13.74228 -10.654880 18.84833
## Apr 2022       3.707963  -6.7050864 14.12101 -12.217422 19.63335
## May 2022       3.628328  -7.4996834 14.75634 -13.390497 20.64715
## Jun 2022       4.541467  -7.2585285 16.34146 -13.505069 22.58800
## Jul 2022       6.793672  -5.6422986 19.22964 -12.225504 25.81285
## Aug 2022      12.185204  -0.8560015 25.22641  -7.759599 32.13001
## Sep 2022      12.036039  -1.5837613 25.65584  -8.793648 32.86573
## Oct 2022      10.381249  -3.7937671 24.55626 -11.297567 32.06006
## Nov 2022      15.864909   1.1554080 30.57441  -6.631331 38.36115
## Dec 2022      16.897099   1.6716608 32.12254  -6.388199 40.18240
## Jan 2023      16.162897   0.4380453 31.88775  -7.886188 40.21198
## Feb 2023       8.872508  -7.3363763 25.08139 -15.916842 33.66186
## Mar 2023       3.402888 -13.2761753 20.08195 -22.105538 28.91132
## Apr 2023       3.014125 -14.1224031 20.15065 -23.193933 29.22218
## May 2023       2.934491 -14.6477810 20.51676 -23.955273 29.82425
## Jun 2023       3.847630 -14.1695335 21.86479 -23.707244 31.40250
## Jul 2023       6.099835 -12.3421363 24.54181 -22.104726 34.30440
## Aug 2023      11.491367  -7.3660094 30.34874 -17.348502 40.33124
## Sep 2023      11.342202  -7.9217865 30.60619 -18.119526 40.80393
## Oct 2023       9.687411  -9.9749397 29.34976 -20.383560 39.75838
## Nov 2023      15.171071  -4.8818861 35.22403 -15.497281 45.83942
## Dec 2023      16.203262  -4.2329902 36.63951 -15.051288 47.45781
db_final <- data.frame(Actuals = fresno_test, Forecast = fc_db$Forecast$mean)
db_final
##      Actuals  Forecast
## 1  21.735106 16.856734
## 2  13.380527  9.566346
## 3   7.099492  4.096726
## 4   6.289888  3.707963
## 5   6.193088  3.628328
## 6   6.117619  4.541467
## 7   8.094263  6.793672
## 8   9.062323 12.185204
## 9   9.546349 12.036039
## 10  9.624117 10.381249
## 11 14.571905 15.864909
## 12 13.263767 16.897099
# 
# # ETS
fc_ets <- ets_fitting(fresno_train,fresno_test)

summary(fc_ets$Model)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.5818 
##     gamma = 2e-04 
## 
##   Initial states:
##     l = 14.22 
##     s = 1.7695 1.5139 0.9994 1.0653 1.1875 0.6651
##            0.5435 0.5017 0.5334 0.5891 1.0097 1.622
## 
##   sigma:  0.3585
## 
##      AIC     AICc      BIC 
## 823.7585 828.9759 863.9904 
## 
## Training set error measures:
##                      ME     RMSE     MAE       MPE    MAPE      MASE      ACF1
## Training set -0.3956302 6.156618 3.81407 -8.568395 27.2337 0.7883348 0.1839134
ets_final <- data.frame(Actuals = fresno_test, Forecast = fc_ets$Forecast$mean)
ets_final
##      Actuals  Forecast
## 1  21.735106 16.996676
## 2  13.380527 10.580830
## 3   7.099492  6.173641
## 4   6.289888  5.589771
## 5   6.193088  5.258433
## 6   6.117619  5.696001
## 7   8.094263  6.971148
## 8   9.062323 12.442476
## 9   9.546349 11.162188
## 10  9.624117 10.472806
## 11 14.571905 15.864534
## 12 13.263767 18.539790
# ARIMA fitting
fc_arima <- arima_fit(fresno_train,fresno_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,0)(2,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1    sar1    sar2     mean
##       0.4999  0.2398  0.1833  12.9737
## s.e.  0.0868  0.0950  0.1216   1.7498
## 
## sigma^2 = 35.63:  log likelihood = -345.26
## AIC=700.52   AICc=701.11   BIC=713.93
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE    MASE
## Training set -0.08890961 5.857849 3.872491 -15.83178 31.30315 0.80041
##                    ACF1
## Training set 0.03690424
arima_final <- data.frame(Actuals = fresno_test, Forecast = fc_arima$Forecast$mean)
arima_final
##      Actuals  Forecast
## 1  21.735106 13.773430
## 2  13.380527 12.050006
## 3   7.099492  9.843897
## 4   6.289888 10.176983
## 5   6.193088 10.048776
## 6   6.117619 10.413866
## 7   8.094263 11.517819
## 8   9.062323 18.787253
## 9   9.546349 19.371318
## 10  9.624117 17.468439
## 11 14.571905 15.390608
## 12 13.263767 14.383411
forecasts <- Arima(fresno, order=c(1,0,0), seasonal = list(order = c(2,0,0), period = 12)) 
forecast_2023(forecasts,fresno,"Fresno, CA")
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 21.735106 13.380527  7.099492  6.289888  6.193088  6.117619  8.094263
## 2023 15.144987 12.339484 10.395017 10.330505 10.260461 10.301400 11.080049
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.062323  9.546349  9.624117 14.571905 13.263767
## 2023 12.970177 12.065867 12.600716 13.757140 12.818686

3. Visalia-Porterville, CA

# 3. Visalia-Porterville, CA
visalia<- data[data$CBSA_NAME == 'Visalia-Porterville, CA',c("year","PM25_Level_FINAL")]
visalia <- ts(visalia$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(visalia,"Visalia-Porterville, CA")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.6302, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.043519, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(visalia, lag=50, main="ACF of Visalia-Porterville, CA")
pacf(visalia, lag=50, main="PACF of Visalia-Porterville, CA")
acf(visalia, lag.max = 12, main="SACF of Visalia-Porterville, CA")
pacf(visalia, lag.max = 12, main="SPACF of Visalia-Porterville, CA")

# Split the data
splited <- train_test_split(visalia)
visalia_train <- splited$train
visalia_test <- splited$test
visalia_train
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2013 23.014839 19.178036 13.019785  9.987944 10.224194 11.378500 12.897419
## 2014 43.415000 19.703140 11.852258 12.190944  9.169086 11.651000 11.434731
## 2015 33.265914 20.307857 12.319839 10.797333  8.569086  9.459389  8.158495
## 2016 12.298065 17.454828  8.411613  9.349667  8.370215 11.225222 13.458011
## 2017 10.316505  8.755060 14.162581  8.619000 10.128871 11.176278 14.586129
## 2018 26.242742 20.718036  7.122258  9.838000  9.123065 10.203667 16.847742
## 2019 17.943548  5.484881  6.608925  9.108056  8.925000 10.537333 11.732151
## 2020 16.207473 14.906552  7.254946  9.658056  8.040860  9.579167 11.722581
## 2021 15.640591 12.851786  7.035484 10.148056  9.519892 12.091389 14.215054
##            Aug       Sep       Oct       Nov       Dec
## 2013 10.788871 10.099111 14.947043 25.088667 40.413817
## 2014 11.152688 10.944944 10.780591 28.994278 13.748065
## 2015 12.356613 15.228722 13.295269 15.261833 14.886022
## 2016 15.054355 12.598611 10.403548 25.695500 21.939194
## 2017 14.700054 13.328389 18.035753 18.359778 41.684839
## 2018 25.778065 12.641000 12.196505 27.986333 19.489247
## 2019 10.344839  9.307667 14.044785 19.776167 11.209315
## 2020 29.334140 42.604444 30.231452 22.107500 18.885753
## 2021 25.273118 37.678611 34.323118 19.725278 10.296237
visalia_test
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 15.898656 11.756548  7.973333  9.411556  7.750538  8.669444 10.187097
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.745968 10.375833 10.090591 15.092778 10.791129
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(visalia_train,visalia_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.9215 
## 
##   Initial states:
##     l = 22.671 
## 
##   sigma:  7.7117
## 
##      AIC     AICc      BIC 
## 950.8843 951.1151 958.9307 
## 
## Error measures:
##                      ME    RMSE      MAE      MPE     MAPE     MASE        ACF1
## Training set -0.1159897 7.64001 5.301716 -10.5515 34.07932 1.004812 0.008471553
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2022       11.12751   1.244511 21.01052  -3.987236 26.24226
## Feb 2022       11.12751  -2.311767 24.56679  -9.426092 31.68112
## Mar 2022       11.12751  -5.106856 27.36188 -13.700812 35.95584
## Apr 2022       11.12751  -7.486870 29.74190 -17.340729 39.59576
## May 2022       11.12751  -9.595318 31.85035 -20.565323 42.82035
## Jun 2022       11.12751 -11.508216 33.76324 -23.490848 45.74588
## Jul 2022       11.12751 -13.271601 35.52663 -26.187712 48.44274
## Aug 2022       11.12751 -14.915859 37.17089 -28.702389 50.95742
## Sep 2022       11.12751 -16.462299 38.71733 -31.067464 53.32249
## Oct 2022       11.12751 -17.926544 40.18157 -33.306834 55.56186
ses_final <- data.frame(Actuals = visalia_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##      Actuals Forecast
## 1  15.898656 11.12751
## 2  11.756548 11.12751
## 3   7.973333 11.12751
## 4   9.411556 11.12751
## 5   7.750538 11.12751
## 6   8.669444 11.12751
## 7  10.187097 11.12751
## 8   9.745968 11.12751
## 9  10.375833 11.12751
## 10 10.090591 11.12751
# Double exponential smoothing
fc_db <- db_smooth_fitting(visalia_train,visalia_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.1116 
##     beta  = 1e-04 
##     gamma = 5e-04 
## 
##   Initial states:
##     l = 18.8222 
##     b = 0.0068 
##     s = 7.6196 8.101 1.3682 1.9019 0.7099 -2.4282
##            -4.7477 -7.0242 -5.7879 -6.0743 0.2591 6.1025
## 
##   sigma:  7.4385
## 
##      AIC     AICc      BIC 
##  955.795  962.595 1001.391 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.1151178 6.865464 4.854562 -11.84502 32.01625 0.9200646
##                   ACF1
## Training set 0.4042467
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2022       23.94670 14.4138213 33.47957  9.3674213 38.52597
## Feb 2022       18.10554  8.5133794 27.69771  3.4355939 32.77550
## Mar 2022       11.78047  2.1292704 21.43166 -2.9797640 26.54070
## Apr 2022       12.07193  2.3619570 21.78190 -2.7781922 26.92205
## May 2022       10.84272  1.0742190 20.61122 -4.0969134 25.78235
## Jun 2022       13.12199  3.2952039 22.94878 -1.9067825 28.15076
## Jul 2022       15.44551  5.5606780 25.33034  0.3279644 30.56305
## Aug 2022       18.59485  8.6522121 28.53749  3.3888960 33.80081
## Sep 2022       19.79137  9.7911473 29.79159  4.4973511 35.08538
## Oct 2022       19.26222  9.2046520 29.31979  3.8804958 34.64395
## Nov 2022       25.99268 15.8779798 36.10738 10.5235819 41.46177
## Dec 2022       25.51454 15.3429323 35.68615  9.9584088 41.07067
## Jan 2023       24.01269 13.7839137 34.24147  8.3691253 39.65626
## Feb 2023       18.17154  7.8862818 28.45680  2.4415954 33.90148
## Mar 2023       11.84646  1.5049331 22.18799 -3.9695410 27.66246
## Apr 2023       12.13792  1.7403316 22.53552 -3.7638217 28.03967
## May 2023       10.90872  0.4552583 21.36217 -5.0784675 26.89590
## Jun 2023       13.18798  2.6788621 23.69711 -2.8843315 29.26030
## Jul 2023       15.51150  4.9469102 26.07610 -0.6456478 31.66866
## Aug 2023       18.66085  8.0409748 29.28072  2.4191538 34.90254
## Sep 2023       19.85736  9.1823981 30.53232  3.5314141 36.18331
## Oct 2023       19.32822  8.5983495 30.05808  2.9183007 35.73813
## Nov 2023       26.05867 15.2740837 36.84326  9.5650670 42.55228
## Dec 2023       25.58053 14.7414031 36.41967  9.0035138 42.15755
db_final <- data.frame(Actuals = visalia_test, Forecast = fc_db$Forecast$mean)
db_final
##      Actuals Forecast
## 1  15.898656 23.94670
## 2  11.756548 18.10554
## 3   7.973333 11.78047
## 4   9.411556 12.07193
## 5   7.750538 10.84272
## 6   8.669444 13.12199
## 7  10.187097 15.44551
## 8   9.745968 18.59485
## 9  10.375833 19.79137
## 10 10.090591 19.26222
## 11 15.092778 25.99268
## 12 10.791129 25.51454
# 
# # ETS
fc_ets <- ets_fitting(visalia_train,visalia_test)

summary(fc_ets$Model)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.1913 
##     gamma = 3e-04 
## 
##   Initial states:
##     l = 16.7473 
##     s = 1.5225 1.4578 1.0393 1.238 1.1992 0.7553
##            0.6285 0.5225 0.5778 0.6326 0.963 1.4635
## 
##   sigma:  0.3769
## 
##      AIC     AICc      BIC 
## 889.3072 894.5246 929.5392 
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.2716263 6.996827 4.973363 -12.18665 32.66693 0.9425804
##                   ACF1
## Training set 0.3495207
ets_final <- data.frame(Actuals = visalia_test, Forecast = fc_ets$Forecast$mean)
ets_final
##      Actuals Forecast
## 1  15.898656 26.75936
## 2  11.756548 17.61390
## 3   7.973333 11.56852
## 4   9.411556 10.56964
## 5   7.750538  9.55793
## 6   8.669444 11.49624
## 7  10.187097 13.81464
## 8   9.745968 21.92095
## 9  10.375833 22.63204
## 10 10.090591 19.00687
## 11 15.092778 26.65941
## 12 10.791129 27.83441
# ARIMA fitting
fc_arima <- arima_fit(visalia_train,visalia_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1     mean
##       1.3702  -0.6152  -0.7410  15.4218
## s.e.  0.1137   0.0814   0.1167   0.6707
## 
## sigma^2 = 42.89:  log likelihood = -354.56
## AIC=719.11   AICc=719.7   BIC=732.52
## 
## Training set error measures:
##                       ME    RMSE      MAE      MPE     MAPE      MASE
## Training set -0.01322584 6.42683 4.515596 -12.9751 30.10386 0.8558218
##                     ACF1
## Training set -0.04018751
arima_final <- data.frame(Actuals = visalia_test, Forecast = fc_arima$Forecast$mean)
arima_final
##      Actuals  Forecast
## 1  15.898656  7.444204
## 2  11.756548  7.644214
## 3   7.973333  9.672690
## 4   9.411556 12.328991
## 5   7.750538 14.720726
## 6   8.669444 16.363757
## 7  10.187097 17.143692
## 8   9.745968 17.201615
## 9  10.375833 16.801200
## 10 10.090591 16.216937
## 11 15.092778 15.662718
## 12 10.791129 15.262761
forecasts <- Arima(visalia, order=c(2,0,1)) 
forecast_2023(forecasts,visalia,"Visalia-Porterville, CA")
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 15.898656 11.756548  7.973333  9.411556  7.750538  8.669444 10.187097
## 2023 14.328922 16.645460 17.750155 17.895574 17.430158 16.689456 15.932849
##            Aug       Sep       Oct       Nov       Dec
## 2022  9.745968 10.375833 10.090591 15.092778 10.791129
## 2023 15.320733 14.920857 14.731033 14.707144 14.788661

4. San Francisco-Oakland-Hayward, CA

# 4. San Francisco-Oakland-Hayward, CA
sanfran<- data[data$CBSA_NAME == 'San Francisco-Oakland-Hayward, CA',c("year","PM25_Level_FINAL")]
sanfran <- ts(sanfran$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(sanfran,"San Francisco-Oakland-Hayward, CA")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -4.8697, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.061811, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(sanfran, lag=50, main="ACF of San Francisco-Oakland-Hayward, CA")
pacf(sanfran, lag=50, main="PACF of San Francisco-Oakland-Hayward, CA")
acf(sanfran, lag.max = 12, main="SACF of San Francisco-Oakland-Hayward, CA")
pacf(sanfran, lag.max = 12, main="SPACF of San Francisco-Oakland-Hayward, CA")

# Split the data
splited <- train_test_split(sanfran)
sanfran_train <- splited$train
sanfran_test <- splited$test
sanfran_train
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2013 14.205537  9.496074  7.415541  8.070708  9.486135  7.520996 10.541469
## 2014 15.272271  6.888707  7.337745  8.106806  7.532240  9.177716  5.911751
## 2015 18.752043 10.084683  8.237760  6.729694  6.261658  7.750862  4.809265
## 2016  7.913970  8.530172  5.204355  7.146444  7.864382  7.915954  6.375233
## 2017  8.663173  6.499740  6.643483  6.804848  7.920355  8.242818  9.194325
## 2018 13.698169  8.903127  6.970046  9.009424  7.694423 10.288071  8.278319
## 2019  8.911051  4.097151  4.942222  7.391300  6.264575  7.968256  7.482152
## 2020  7.159235  8.462991  5.234871  5.324676  5.912113  6.587807  7.381379
## 2021  8.737460  5.870589  5.735856  8.839953  8.445042  5.911200  6.743109
##            Aug       Sep       Oct       Nov       Dec
## 2013  6.492399  6.653721 10.852361 13.002962 17.633854
## 2014  6.684977  9.054396  7.023636 10.297968  8.070948
## 2015  8.904004  6.739120  7.007894  7.792231  8.085157
## 2016  6.273144  9.701631  5.735797  8.096882  8.883936
## 2017 11.125396 14.077303 16.615449  8.883798 16.846491
## 2018 13.595821 10.592313  8.260573 41.133327  8.714585
## 2019  6.718850  6.118885  8.486043 10.896430  6.519245
## 2020 15.618696 30.861777 14.316191  8.483605 11.106173
## 2021 10.974883 10.075973  5.607763  9.695813  8.199860
sanfran_test
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 11.114506  8.902186  6.024010  7.144165  7.157204  6.550333  4.791828
##            Aug       Sep       Oct       Nov       Dec
## 2022  6.682401  8.680000  7.463011  8.035926 10.835161
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(sanfran_train,sanfran_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 9.16 
## 
##   sigma:  4.7461
## 
##      AIC     AICc      BIC 
## 846.0338 846.2646 854.0802 
## 
## Error measures:
##                       ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 0.000183296 4.701962 2.69014 -13.86039 28.48749 0.7357193
##                   ACF1
## Training set 0.2283456
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80      Lo 95   Hi 95
## Jan 2022       9.159993 3.077605 15.24238 -0.1422173 18.4622
## Feb 2022       9.159993 3.077605 15.24238 -0.1422173 18.4622
## Mar 2022       9.159993 3.077605 15.24238 -0.1422174 18.4622
## Apr 2022       9.159993 3.077605 15.24238 -0.1422174 18.4622
## May 2022       9.159993 3.077605 15.24238 -0.1422175 18.4622
## Jun 2022       9.159993 3.077604 15.24238 -0.1422175 18.4622
## Jul 2022       9.159993 3.077604 15.24238 -0.1422175 18.4622
## Aug 2022       9.159993 3.077604 15.24238 -0.1422176 18.4622
## Sep 2022       9.159993 3.077604 15.24238 -0.1422176 18.4622
## Oct 2022       9.159993 3.077604 15.24238 -0.1422177 18.4622
ses_final <- data.frame(Actuals = sanfran_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##      Actuals Forecast
## 1  11.114506 9.159993
## 2   8.902186 9.159993
## 3   6.024010 9.159993
## 4   7.144165 9.159993
## 5   7.157204 9.159993
## 6   6.550333 9.159993
## 7   4.791828 9.159993
## 8   6.682401 9.159993
## 9   8.680000 9.159993
## 10  7.463011 9.159993
# Double exponential smoothing
fc_db <- db_smooth_fitting(sanfran_train,sanfran_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.1071 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 11.4529 
##     b = -0.0264 
##     s = 1.3734 4.3974 0.6746 2.6299 0.2955 -2.0863
##            -0.9975 -1.7948 -1.8331 -2.7878 -1.6708 1.7995
## 
##   sigma:  4.7068
## 
##      AIC     AICc      BIC 
## 856.9399 863.7399 902.5361 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE     ACF1
## Training set -0.0368937 4.344199 2.667767 -10.24447 28.11946 0.7296007 0.129713
## 
## Forecasts:
##          Point Forecast        Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2022       9.856998  3.824964053 15.88903  0.6317983 19.08220
## Feb 2022       6.359337  0.292746998 12.42593 -2.9187118 15.63739
## Mar 2022       5.215362 -0.885661835 11.31639 -4.1153489 14.54607
## Apr 2022       6.143362  0.008024176 12.27870 -3.2398273 15.52655
## May 2022       6.154704 -0.014828282 12.32424 -3.2807816 15.59019
## Jun 2022       6.924687  0.721075278 13.12830 -2.5629183 16.41229
## Jul 2022       5.809871 -0.427705462 12.04745 -3.7296788 15.34942
## Aug 2022       8.164503  1.893074648 14.43593 -1.4268189 17.75582
## Sep 2022      10.471519  4.166349407 16.77669  0.8285942 20.11444
## Oct 2022       8.488994  2.150191646 14.82780 -1.2053678 18.18336
## Nov 2022      12.185157  5.812828499 18.55749  2.4395215 21.93079
## Dec 2022       9.134696  2.728946772 15.54045 -0.6620522 18.93144
## Jan 2023       9.534214  3.095057292 15.97337 -0.3136263 19.38205
## Feb 2023       6.036553 -0.435818543 12.50893 -3.8620853 15.93519
## Mar 2023       4.892578 -1.612908626 11.39807 -5.0567055 14.84186
## Apr 2023       5.820578 -0.717925788 12.35908 -4.1792006 15.82036
## May 2023       5.831921 -0.739502813 12.40334 -4.2182043 15.88205
## Jun 2023       6.601903 -0.002344703 13.20615 -3.4984223 16.70223
## Jul 2023       5.487087 -1.149891279 12.12406 -4.6632952 15.63747
## Aug 2023       7.841719  1.172103088 14.51133 -2.3585783 18.04202
## Sep 2023      10.148735  3.446572659 16.85090 -0.1013380 20.39881
## Oct 2023       8.166211  1.431590715 14.90083 -2.1335017 18.46592
## Nov 2023      11.862373  5.095384825 18.62936  1.5131575 22.21159
## Dec 2023       8.811913  2.012642214 15.61118 -1.5866741 19.21050
db_final <- data.frame(Actuals = sanfran_test, Forecast = fc_db$Forecast$mean)
db_final
##      Actuals  Forecast
## 1  11.114506  9.856998
## 2   8.902186  6.359337
## 3   6.024010  5.215362
## 4   7.144165  6.143362
## 5   7.157204  6.154704
## 6   6.550333  6.924687
## 7   4.791828  5.809871
## 8   6.682401  8.164503
## 9   8.680000 10.471519
## 10  7.463011  8.488994
## 11  8.035926 12.185157
## 12 10.835161  9.134696
# 
# # ETS
fc_ets <- ets_fitting(sanfran_train,sanfran_test)

summary(fc_ets$Model)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.3019 
##     gamma = 9e-04 
## 
##   Initial states:
##     l = 9.5722 
##     s = 1.2001 1.6957 0.9555 1.2946 0.9451 0.6954
##            0.7917 0.7526 0.7551 0.698 0.8515 1.3646
## 
##   sigma:  0.366
## 
##      AIC     AICc      BIC 
## 775.7197 780.9371 815.9517 
## 
## Training set error measures:
##                      ME     RMSE      MAE      MPE     MAPE      MASE
## Training set -0.3126575 4.510242 2.744202 -11.1736 28.95184 0.7505045
##                      ACF1
## Training set -0.007968911
ets_final <- data.frame(Actuals = sanfran_test, Forecast = fc_ets$Forecast$mean)
ets_final
##      Actuals  Forecast
## 1  11.114506  9.991528
## 2   8.902186  6.240016
## 3   6.024010  5.113478
## 4   7.144165  5.535581
## 5   7.157204  5.516101
## 6   6.550333  5.801981
## 7   4.791828  5.097944
## 8   6.682401  6.924459
## 9   8.680000  9.473246
## 10  7.463011  6.999033
## 11  8.035926 12.402521
## 12 10.835161  8.789161
# ARIMA fitting
fc_arima <- arima_fit(sanfran_train,sanfran_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.2288  9.1710
## s.e.  0.0937  0.5694
## 
## sigma^2 = 21.34:  log likelihood = -317.52
## AIC=641.05   AICc=641.28   BIC=649.09
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -0.01189837 4.576104 2.553642 -12.82878 26.80691 0.698389
##                     ACF1
## Training set -0.01027321
arima_final <- data.frame(Actuals = sanfran_test, Forecast = fc_arima$Forecast$mean)
arima_final
##      Actuals Forecast
## 1  11.114506 8.948852
## 2   8.902186 9.120199
## 3   6.024010 9.159398
## 4   7.144165 9.168365
## 5   7.157204 9.170417
## 6   6.550333 9.170886
## 7   4.791828 9.170994
## 8   6.682401 9.171018
## 9   8.680000 9.171024
## 10  7.463011 9.171025
## 11  8.035926 9.171025
## 12 10.835161 9.171025
forecasts <- hw(sanfran,seasonal = "additive") 
forecast_2023(forecasts,sanfran,"San Francisco-Oakland-Hayward, CA")
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 11.114506  8.902186  6.024010  7.144165  7.157204  6.550333  4.791828
## 2023  9.050103  6.052096  4.606744  5.596654  5.430355  6.029005  5.357698
##            Aug       Sep       Oct       Nov       Dec
## 2022  6.682401  8.680000  7.463011  8.035926 10.835161
## 2023  7.374091  9.623147  7.283380 11.237765  8.764217

5. Los Angeles-Long Beach-Anaheim, CA

# 5. Los Angeles-Long Beach-Anaheim, CA
la<- data[data$CBSA_NAME == 'Los Angeles-Long Beach-Anaheim, CA',c("year","PM25_Level_FINAL")]
la <- ts(la$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(la,"Los Angeles-Long Beach-Anaheim, CA")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -4.7625, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.20178, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(la, lag=50, main="ACF of Los Angeles-Long Beach-Anaheim, CA")
pacf(la, lag=50, main="PACF of Los Angeles-Long Beach-Anaheim, CA")
acf(la, lag.max = 12, main="SACF of Los Angeles-Long Beach-Anaheim, CA")
pacf(la, lag.max = 12, main="SPACF of Los Angeles-Long Beach-Anaheim, CA")

# Split the data
splited <- train_test_split(la)
la_train <- splited$train
la_test <- splited$test
la_train
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2013 10.775721 11.586037 14.040365 10.645398 12.254288 11.916965 12.532003
## 2014 19.639388 13.609770 11.681271 11.725955 13.457021 18.326931 15.166092
## 2015 17.663924 18.120735 11.660419 10.621967 11.181164 13.547517 10.985849
## 2016 12.049397 10.877542 10.942865  9.582993  9.413870 13.629007 12.865365
## 2017  9.751119  8.525057 11.403447  9.846657 10.712801 12.185195 13.833005
## 2018 15.823293 11.089484  7.832485 12.110653  9.441849 12.642665 12.712067
## 2019 10.389767  6.740379  6.932178  9.044187  8.185871  9.679808 13.120254
## 2020 13.934051 11.194071  5.876351  7.791335 10.140212  8.984350 13.311061
## 2021 12.361618 10.876266  8.334549 11.525415 11.213888  9.743016 12.429164
##            Aug       Sep       Oct       Nov       Dec
## 2013 12.907057 11.021956 14.178679 11.772750 13.554888
## 2014 13.554983 12.823381 14.853963 10.684021 13.293163
## 2015 13.565537 11.769490 10.374864  8.643998 11.163394
## 2016 12.175106 11.464115  9.402369 11.243997 12.910343
## 2017 13.600511 11.663732 13.771886 11.967071 19.541431
## 2018 15.076603 12.958787 10.899603 12.814561 12.850120
## 2019 10.489368  9.797378 11.468439 14.244683 10.851441
## 2020 13.122716 23.125787 20.011531 14.695296 14.502180
## 2021 12.672363 12.820703  9.931610 18.006346 16.460511
la_test
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 12.524817  9.532310  9.562707 11.068886 11.428857 11.606964 10.449607
##            Aug       Sep       Oct       Nov       Dec
## 2022 10.282575 10.278517 11.316487  9.655000 11.042912
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(la_train,la_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.5218 
## 
##   Initial states:
##     l = 11.4362 
## 
##   sigma:  2.7759
## 
##      AIC     AICc      BIC 
## 730.1852 730.4159 738.2315 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE     MASE      ACF1
## Training set 0.07443112 2.750117 2.121602 -3.034369 17.79791 0.832403 0.1234916
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2022       15.63088 12.073371 19.18839 10.190139 21.07162
## Feb 2022       15.63088 11.618149 19.64361  9.493937 21.76783
## Mar 2022       15.63088 11.209551 20.05221  8.869040 22.39272
## Apr 2022       15.63088 10.835644 20.42612  8.297198 22.96457
## May 2022       15.63088 10.488854 20.77291  7.766829 23.49493
## Jun 2022       15.63088 10.164019 21.09774  7.270037 23.99173
## Jul 2022       15.63088  9.857431 21.40433  6.801151 24.46061
## Aug 2022       15.63088  9.566323 21.69544  6.355940 24.90582
## Sep 2022       15.63088  9.288563 21.97320  5.931142 25.33062
## Oct 2022       15.63088  9.022466 22.23930  5.524183 25.73758
ses_final <- data.frame(Actuals = la_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##      Actuals Forecast
## 1  12.524817 15.63088
## 2   9.532310 15.63088
## 3   9.562707 15.63088
## 4  11.068886 15.63088
## 5  11.428857 15.63088
## 6  11.606964 15.63088
## 7  10.449607 15.63088
## 8  10.282575 15.63088
## 9  10.278517 15.63088
## 10 11.316487 15.63088
# Double exponential smoothing
fc_db <- db_smooth_fitting(la_train,la_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.3183 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 12.5364 
##     b = 0.0164 
##     s = 1.589 0.2728 0.6162 0.843 0.6492 0.9808
##            0.2534 -1.5601 -1.8149 -2.4329 -0.7276 1.3311
## 
##   sigma:  2.6604
## 
##      AIC     AICc      BIC 
## 733.7015 740.5015 779.2977 
## 
## Error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.01553613 2.455408 1.819019 -3.128215 14.90093 0.7136854
##                   ACF1
## Training set 0.1607398
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2022       15.08074 11.671337 18.49013 9.866511 20.29496
## Feb 2022       13.03803  9.460000 16.61606 7.565905 18.51016
## Mar 2022       11.34921  7.610048 15.08838 5.630653 17.06777
## Apr 2022       11.98319  8.089457 15.87692 6.028240 17.93814
## May 2022       12.25433  8.211842 16.29681 6.071881 18.43677
## Jun 2022       14.08386  9.897821 18.26991 7.681864 20.48586
## Jul 2022       14.82755 10.502622 19.15247 8.213146 21.44195
## Aug 2022       14.51260 10.053036 18.97217 7.692285 21.33292
## Sep 2022       14.72234 10.131995 19.31267 7.702016 21.74265
## Oct 2022       14.51179  9.794224 19.22936 7.296894 21.72669
## Nov 2022       14.18485  9.343320 19.02639 6.780367 21.58934
## Dec 2022       15.51712 10.554642 20.47959 7.927666 23.10657
## Jan 2023       15.27554 10.194856 20.35623 7.505303 23.04578
## Feb 2023       13.23284  8.036629 18.42905 5.285922 21.17976
## Mar 2023       11.54402  6.234731 16.85331 3.424163 19.66388
## Apr 2023       12.17800  6.757918 17.59807 3.888702 20.46729
## May 2023       12.44913  6.920419 17.97785 3.993694 20.90457
## Jun 2023       14.27867  8.643347 19.91399 5.660187 22.89716
## Jul 2023       15.02235  9.282335 20.76237 6.243753 23.80095
## Aug 2023       14.70741  8.864508 20.55031 5.771463 23.64335
## Sep 2023       14.91714  8.973078 20.86121 5.826480 24.00780
## Oct 2023       14.70660  8.663003 20.75020 5.463716 23.94948
## Nov 2023       14.37966  8.238082 20.52124 4.986927 23.77239
## Dec 2023       15.71193  9.473846 21.95000 6.171606 25.25224
db_final <- data.frame(Actuals = la_test, Forecast = fc_db$Forecast$mean)
db_final
##      Actuals Forecast
## 1  12.524817 15.08074
## 2   9.532310 13.03803
## 3   9.562707 11.34921
## 4  11.068886 11.98319
## 5  11.428857 12.25433
## 6  11.606964 14.08386
## 7  10.449607 14.82755
## 8  10.282575 14.51260
## 9  10.278517 14.72234
## 10 11.316487 14.51179
## 11  9.655000 14.18485
## 12 11.042912 15.51712
# 
# # ETS
fc_ets <- ets_fitting(la_train,la_test)

summary(fc_ets$Model)
## ETS(M,N,N) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.5478 
## 
##   Initial states:
##     l = 10.7682 
## 
##   sigma:  0.2273
## 
##      AIC     AICc      BIC 
## 725.6362 725.8669 733.6826 
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.08415087 2.751333 2.127693 -2.883279 17.82615 0.8347925
##                   ACF1
## Training set 0.1056579
ets_final <- data.frame(Actuals = la_test, Forecast = fc_ets$Forecast$mean)
ets_final
##      Actuals Forecast
## 1  12.524817 15.74696
## 2   9.532310 15.74696
## 3   9.562707 15.74696
## 4  11.068886 15.74696
## 5  11.428857 15.74696
## 6  11.606964 15.74696
## 7  10.449607 15.74696
## 8  10.282575 15.74696
## 9  10.278517 15.74696
## 10 11.316487 15.74696
## 11  9.655000 15.74696
## 12 11.042912 15.74696
# ARIMA fitting
fc_arima <- arima_fit(la_train,la_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,0)(1,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1    sar1     mean
##       0.4525  0.1899  12.2218
## s.e.  0.0868  0.0966   0.5141
## 
## sigma^2 = 6.163:  log likelihood = -250.26
## AIC=508.52   AICc=508.91   BIC=519.25
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.01380938 2.447768 1.822124 -3.717128 15.32118 0.7149039
##                    ACF1
## Training set 0.02517927
arima_final <- data.frame(Actuals = la_test, Forecast = fc_arima$Forecast$mean)
arima_final
##      Actuals Forecast
## 1  12.524817 13.97047
## 2   9.532310 12.74553
## 3   9.562707 11.83613
## 4  11.068886 12.24909
## 5  11.428857 12.10255
## 6  11.606964 11.78365
## 7  10.449607 12.27594
## 8  10.282575 12.31403
## 9  10.278517 12.33854
## 10 11.316487 11.78816
## 11  9.655000 13.32103
## 12 11.042912 13.02710
forecasts <- Arima(la, order=c(1,0,0), seasonal = list(order = c(1,0,0), period = 12)) 
forecast_2023(forecasts,la,"Los Angeles-Long Beach-Anaheim, CA")
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 12.524817  9.532310  9.562707 11.068886 11.428857 11.606964 10.449607
## 2023 11.322692 11.257390 11.453383 11.784082 11.882126 11.929120 11.750393
##            Aug       Sep       Oct       Nov       Dec
## 2022 10.282575 10.278517 11.316487  9.655000 11.042912
## 2023 11.727198 11.728277 11.896820 11.628663 11.853134

6. Cheyenne, WY

# 6. Cheyenne, WY
cheyen<- data[data$CBSA_NAME == 'Cheyenne, WY',c("year","PM25_Level_FINAL")]
cheyen <- ts(cheyen$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(cheyen,"Cheyenne, WY")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.2041, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.28013, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(cheyen, lag=50, main="ACF of Cheyenne, WY")
pacf(cheyen, lag=50, main="PACF of Cheyenne, WY")
acf(cheyen, lag.max = 12, main="SACF of Cheyenne, WY")
pacf(cheyen, lag.max = 12, main="SPACF of Cheyenne, WY")

# Split the data
splited <- train_test_split(cheyen)
cheyen_train <- splited$train
cheyen_test <- splited$test
cheyen_train
##             Jan        Feb        Mar        Apr        May        Jun
## 2013  1.6126344  1.5739137  3.3250000  2.6661111  2.8191398  5.3800000
## 2014  0.9713441  4.4119048  2.4706989  2.2715556  2.4876882  2.6820159
## 2015  4.1057527  4.4259524  4.8511425  4.8986111  3.2926882  3.9189352
## 2016  3.0029032  2.9466092  3.2833333  3.3020556  4.4889247  5.4498889
## 2017  4.9195161  3.3258333  3.7290860  2.2068889  2.0098925  3.2910556
## 2018  1.2086022  3.2544643  1.4599462  2.6991667  2.3529570  3.9180556
## 2019  2.0918280  5.1343112  3.8665361  2.3275556  2.3934409  2.5841349
## 2020  1.6069508  1.9694089  1.9318203  2.5377778  2.3854839  2.8379444
## 2021  1.1935484  3.4370370  2.5186828  2.1000000  1.7588710  4.1372222
##             Jul        Aug        Sep        Oct        Nov        Dec
## 2013  4.0668817  3.5137634  1.4604444  2.4219355  2.4233333  2.1655914
## 2014  7.6033333  5.4669892  5.5960556  4.3606989  4.1817778  3.8540860
## 2015  5.9322581  7.5864516  3.3644444  3.3578495  2.7740000  2.9902688
## 2016  5.9890860  5.6079570  4.6757222  4.6233871  4.8945000  5.2215054
## 2017  4.4626344  6.1362366 10.0644583  1.8279570  1.7577778  2.5867742
## 2018  5.4784946 10.8131720  4.6116667  1.9525134  1.7134524  2.2368280
## 2019  4.5142396  4.1097696  4.1637500  3.8837404  2.7045079  1.6720814
## 2020  4.1000000  8.9000000 11.9850000 10.7505376  2.0244444  0.8607527
## 2021  7.8982527 15.6322581  8.3877778  4.2655914  4.4244444  4.6795699
cheyen_test
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2022 3.854839 4.630952 4.787634 7.105556 5.347715 5.810556 5.461290 4.827599
##           Sep      Oct      Nov      Dec
## 2022 6.038333 3.956452 3.977500 4.362903
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(cheyen_train,cheyen_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.9455 
## 
##   Initial states:
##     l = 1.6159 
## 
##   sigma:  2.3198
## 
##      AIC     AICc      BIC 
## 691.4068 691.6375 699.4532 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.02986968 2.298172 1.418023 -14.77986 41.25494 0.7521606
##                    ACF1
## Training set 0.01152116
## 
## Forecasts:
##          Point Forecast      Lo 80     Hi 80      Lo 95     Hi 95
## Jan 2022       4.665914  1.6930330  7.638795  0.1192849  9.212543
## Feb 2022       4.665914  0.5746515  8.757176 -1.5911320 10.922960
## Mar 2022       4.665914 -0.2978383  9.629666 -2.9254900 12.257318
## Apr 2022       4.665914 -1.0384041 10.370232 -4.0580876 13.389915
## May 2022       4.665914 -1.6933039 11.025132 -5.0596705 14.391498
## Jun 2022       4.665914 -2.2867876 11.618615 -5.9673255 15.299153
## Jul 2022       4.665914 -2.8334505 12.165278 -6.8033742 16.135202
## Aug 2022       4.665914 -3.3428859 12.674714 -7.5824884 16.914316
## Sep 2022       4.665914 -3.8217996 13.153627 -8.3149238 17.646752
## Oct 2022       4.665914 -4.2750977 13.606926 -9.0081834 18.340011
ses_final <- data.frame(Actuals = cheyen_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##     Actuals Forecast
## 1  3.854839 4.665914
## 2  4.630952 4.665914
## 3  4.787634 4.665914
## 4  7.105556 4.665914
## 5  5.347715 4.665914
## 6  5.810556 4.665914
## 7  5.461290 4.665914
## 8  4.827599 4.665914
## 9  6.038333 4.665914
## 10 3.956452 4.665914
# Double exponential smoothing
fc_db <- db_smooth_fitting(cheyen_train,cheyen_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.1084 
##     beta  = 1e-04 
##     gamma = 2e-04 
## 
##   Initial states:
##     l = 2.6026 
##     b = 0.0262 
##     s = -1.2228 -0.9852 0.4918 2.0047 3.4061 1.727
##            -0.2154 -1.2983 -1.149 -0.9178 -0.5405 -1.3005
## 
##   sigma:  1.9834
## 
##      AIC     AICc      BIC 
## 670.2716 677.0716 715.8678 
## 
## Error measures:
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.005262972 1.830586 1.294002 -17.39583 39.52667 0.6863764
##                  ACF1
## Training set 0.348038
## 
## Forecasts:
##          Point Forecast    Lo 80     Hi 80     Lo 95     Hi 95
## Jan 2022       4.083018 1.541201  6.624834 0.1956443  7.970391
## Feb 2022       4.869684 2.312946  7.426421 0.9594915  8.779876
## Mar 2022       4.518493 1.946895  7.090091 0.5855730  8.451413
## Apr 2022       4.313304 1.726903  6.899705 0.3577449  8.268863
## May 2022       4.190189 1.589042  6.791336 0.2120778  8.168300
## Jun 2022       5.299222 2.683385  7.915058 1.2986449  9.299799
## Jul 2022       7.267337 4.636866  9.897808 3.2443789 11.290295
## Aug 2022       8.973017 6.327966 11.618069 4.9277608 13.018274
## Sep 2022       7.597498 4.937920 10.257076 3.5300247 11.664972
## Oct 2022       6.110138 3.436086  8.784191 2.0205280 10.199748
## Nov 2022       4.659712 1.971236  7.348187 0.5480436  8.771380
## Dec 2022       4.448537 1.745690  7.151384 0.3148886  8.582185
## Jan 2023       4.396445 1.679228  7.113663 0.2408202  8.552070
## Feb 2023       5.183111 2.451621  7.914601 1.0056577  9.360565
## Mar 2023       4.831921 2.086206  7.577635 0.6327127  9.031129
## Apr 2023       4.626732 1.866840  7.386624 0.4058419  8.847622
## May 2023       4.503616 1.729595  7.277638 0.2611161  8.746117
## Jun 2023       5.612649 2.824543  8.400756 1.3486090  9.876690
## Jul 2023       7.580765 4.778620 10.382910 3.2952538 11.866276
## Aug 2023       9.286445 6.470306 12.102584 4.9795317 13.593359
## Sep 2023       7.910926 5.080836 10.741016 3.5826773 12.239175
## Oct 2023       6.423566 3.579569  9.267563 2.0740482 10.773084
## Nov 2023       4.973139 2.115278  7.831001 0.6024177  9.343861
## Dec 2023       4.761965 1.890281  7.633648 0.3701033  9.153826
db_final <- data.frame(Actuals = cheyen_test, Forecast = fc_db$Forecast$mean)
db_final
##     Actuals Forecast
## 1  3.854839 4.083018
## 2  4.630952 4.869684
## 3  4.787634 4.518493
## 4  7.105556 4.313304
## 5  5.347715 4.190189
## 6  5.810556 5.299222
## 7  5.461290 7.267337
## 8  4.827599 8.973017
## 9  6.038333 7.597498
## 10 3.956452 6.110138
## 11 3.977500 4.659712
## 12 4.362903 4.448537
# 
# # ETS
fc_ets <- ets_fitting(cheyen_train,cheyen_test)

summary(fc_ets$Model)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 0.5828 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 3.1298 
##     s = 0.6913 0.7705 1.0641 1.6205 1.8346 1.3512
##            0.9099 0.6565 0.7198 0.7944 1.0497 0.5375
## 
##   sigma:  0.4153
## 
##      AIC     AICc      BIC 
## 599.6235 604.8409 639.8555 
## 
## Training set error measures:
##                       ME    RMSE      MAE       MPE     MAPE      MASE
## Training set -0.01896406 1.90263 1.342826 -13.15622 38.36993 0.7122739
##                    ACF1
## Training set 0.09437624
ets_final <- data.frame(Actuals = cheyen_test, Forecast = fc_ets$Forecast$mean)
ets_final
##     Actuals  Forecast
## 1  3.854839  3.322249
## 2  4.630952  6.487170
## 3  4.787634  4.910064
## 4  7.105556  4.448661
## 5  5.347715  4.057761
## 6  5.810556  5.623504
## 7  5.461290  8.351145
## 8  4.827599 11.338363
## 9  6.038333 10.014395
## 10 3.956452  6.576614
## 11 3.977500  4.762090
## 12 4.362903  4.272904
# ARIMA fitting
fc_arima <- arima_fit(cheyen_train,cheyen_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       1.2487  -0.5495  -0.6419  3.9158
## s.e.  0.1925   0.1040   0.2184  0.2246
## 
## sigma^2 = 3.88:  log likelihood = -224.71
## AIC=459.43   AICc=460.02   BIC=472.84
## 
## Training set error measures:
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.008836568 1.932934 1.372384 -21.06599 42.61813 0.7279527
##                    ACF1
## Training set 0.02300424
arima_final <- data.frame(Actuals = cheyen_test, Forecast = fc_arima$Forecast$mean)
arima_final
##     Actuals Forecast
## 1  3.854839 3.407597
## 2  4.630952 2.861510
## 3  4.787634 2.878643
## 4  7.105556 3.200134
## 5  5.347715 3.592148
## 6  5.810556 3.904965
## 7  5.461290 4.080134
## 8  4.827599 4.126954
## 9  6.038333 4.089152
## 10 3.956452 4.016221
## 11 3.977500 3.945930
## 12 4.362903 3.898239
forecasts <- Arima(cheyen, order=c(2,0,1)) 
forecast_2023(forecasts,cheyen,"Cheyenne, WY")
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2022 3.854839 4.630952 4.787634 7.105556 5.347715 5.810556 5.461290 4.827599
## 2023 4.152718 4.013415 3.958493 3.963291 3.994315 4.026238 4.046859 4.054608
##           Sep      Oct      Nov      Dec
## 2022 6.038333 3.956452 3.977500 4.362903
## 2023 4.053479 4.048604 4.043763 4.040716

7. Wilmington, NC

# 7. Wilmington, NC
wilmin<- data[data$CBSA_NAME == 'Wilmington, NC',c("year","PM25_Level_FINAL")]
wilmin <- ts(wilmin$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is non- stationary
adf_kpss_test(wilmin,"Wilmington, NC")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -2.8944, Lag order = 4, p-value = 0.2051
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.8084, Truncation lag parameter = 4, p-value = 0.01
par(mfrow=c(2,2)) 
acf(wilmin, lag=50, main="ACF of Wilmington, NC")
pacf(wilmin, lag=50, main="PACF of Wilmington, NC")
acf(wilmin, lag.max = 12, main="SACF of Wilmington, NC")
pacf(wilmin, lag.max = 12, main="SPACF of Wilmington, NC")

# Differencing - stationary
wilmin_stat <- diff(wilmin)
adf_kpss_test(wilmin_stat,"Wilmington, NC")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -6.7074, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.069729, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(wilmin_stat, lag=50, main="ACF of Wilmington, NC")
pacf(wilmin_stat, lag=50, main="PACF of Wilmington, NC")
acf(wilmin_stat, lag.max = 12, main="SACF of Wilmington, NC")
pacf(wilmin_stat, lag.max = 12, main="SPACF of Wilmington, NC")

# Split the data
splited <- train_test_split(wilmin_stat)
## Warning in window.default(x, ...): 'start' value not changed
wilmin_train <- splited$train
wilmin_test <- splited$test
wilmin_train
##               Jan          Feb          Mar          Apr          May
## 2013              -3.016805876  1.437235983 -1.319408602  0.728010753
## 2014  0.577688172  1.682675691 -4.399879992 -0.061241039  0.081671147
## 2015 -0.095161290  1.479262673 -2.941628264 -0.624086022  0.961720430
## 2016 -1.104032258  1.282007786  3.894605117 -1.143225806  2.417016129
## 2017  0.008064516  2.399711982 -0.893260369 -0.818269614  5.398645958
## 2018 -2.184408602 -0.871639785  1.943145161 -0.812311828 -1.064513889
## 2019  0.735215054  0.310176651 -0.216269841 -0.193888889  0.426057348
## 2020  0.693279570 -2.277817946 -0.160622914  0.161523297 -2.754265233
## 2021 -0.972580645 -0.810829493  1.470641321  1.522930108 -1.648602151
##               Jun          Jul          Aug          Sep          Oct
## 2013  0.759072581 -1.795833333  2.265793011  3.984997219 -2.755561735
## 2014 -1.868754480 -1.155439068  3.325806452 -5.242311828  1.991908602
## 2015  0.448279570  1.502258065  0.814516129 -3.086429366 -0.279575597
## 2016 -0.855349462 -2.451505376 -1.108064516  0.529569892 -0.556845238
## 2017 -3.049139785  0.401021505 -0.903494624  0.307473118 -2.437043011
## 2018  4.630663314 -3.331256952 -0.012903226 -3.840322581  3.226736111
## 2019 -0.754946237  3.242500000 -1.840241935 -2.298924731  0.648387097
## 2020  2.842043011  0.859569892 -0.556868743  0.615632184  0.054946237
## 2021 -1.638897849 -0.298467742  2.500000000 -2.390698925 -0.110913978
##               Nov          Dec
## 2013  0.378521505 -0.598548387
## 2014  1.281980287  0.489390681
## 2015 -1.215213675 -0.684910394
## 2016  1.733511905 -1.745752688
## 2017  1.722330367 -0.615072303
## 2018 -0.256736111 -0.129677419
## 2019  2.949112903 -1.023306452
## 2020  1.150053763  1.653172043
## 2021  1.594247312  0.775107527
wilmin_test
##               Jan          Feb          Mar          Apr          May
## 2022 -0.414516129  0.640956221 -0.084504608 -1.080376344  0.388575269
##               Jun          Jul          Aug          Sep          Oct
## 2022 -0.220241935 -1.516989247  0.503225806 -0.151236559  1.715752688
##               Nov          Dec
## 2022 -0.002419355 -0.277016129
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(wilmin_train,wilmin_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0279 
## 
##   sigma:  1.9371
## 
##      AIC     AICc      BIC 
## 645.4659 645.6989 653.4844 
## 
## Error measures:
##                        ME     RMSE      MAE     MPE     MAPE      MASE
## Training set 0.0002694892 1.918885 1.490281 100.679 102.9097 0.6818697
##                    ACF1
## Training set -0.3446098
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2022    -0.02786226 -2.510322 2.454597 -3.824457 3.768732
## Feb 2022    -0.02786226 -2.510322 2.454597 -3.824457 3.768732
## Mar 2022    -0.02786226 -2.510322 2.454597 -3.824457 3.768732
## Apr 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## May 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## Jun 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## Jul 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## Aug 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## Sep 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
## Oct 2022    -0.02786226 -2.510322 2.454598 -3.824457 3.768732
ses_final <- data.frame(Actuals = wilmin_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##        Actuals    Forecast
## 1  -0.41451613 -0.02786226
## 2   0.64095622 -0.02786226
## 3  -0.08450461 -0.02786226
## 4  -1.08037634 -0.02786226
## 5   0.38857527 -0.02786226
## 6  -0.22024194 -0.02786226
## 7  -1.51698925 -0.02786226
## 8   0.50322581 -0.02786226
## 9  -0.15123656 -0.02786226
## 10  1.71575269 -0.02786226
# Double exponential smoothing
fc_db <- db_smooth_fitting(wilmin_train,wilmin_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 5e-04 
##     beta  = 2e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = -0.3013 
##     b = 0.0043 
##     s = -0.323 -0.4129 1.2436 -0.1921 -1.0257 0.5244
##            -0.2415 0.0025 0.5257 -0.2886 -0.0117 0.1993
## 
##   sigma:  1.9989
## 
##      AIC     AICc      BIC 
## 664.8762 671.7526 710.3143 
## 
## Error measures:
##                     ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 0.0179858 1.843388 1.431023 168.8138 204.8096 0.6547564 -0.3453954
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2022   -0.117887969 -2.679563 2.443787 -4.035632 3.799856
## Feb 2022    0.408875586 -2.152800 2.970551 -3.508869 4.326620
## Mar 2022    0.202621097 -2.359055 2.764298 -3.715125 4.120367
## Apr 2022   -0.069843180 -2.631521 2.491835 -3.987591 3.847905
## May 2022    0.749109788 -1.812570 3.310789 -3.168641 4.666861
## Jun 2022    0.230599917 -2.331082 2.792282 -3.687155 4.148354
## Jul 2022   -0.008977434 -2.570662 2.552707 -3.926736 3.908782
## Aug 2022    0.761510373 -1.800178 3.323199 -3.156254 4.679275
## Sep 2022   -0.784218034 -3.345911 1.777475 -4.701989 3.133553
## Oct 2022    0.054375687 -2.507322 2.616074 -3.863403 3.972155
## Nov 2022    1.494226489 -1.067478 4.055931 -2.423562 5.412015
## Dec 2022   -0.157257127 -2.718968 2.404454 -4.075056 3.760542
## Jan 2023   -0.062952614 -2.624672 2.498767 -3.980765 3.854860
## Feb 2023    0.463810942 -2.097918 3.025540 -3.454015 4.381637
## Mar 2023    0.257556452 -2.304183 2.819296 -3.660285 4.175398
## Apr 2023   -0.014907825 -2.576658 2.546843 -3.932767 3.902951
## May 2023    0.804045143 -1.757718 3.365808 -3.113834 4.721924
## Jun 2023    0.285535273 -2.276242 2.847313 -3.632365 4.203435
## Jul 2023    0.045957921 -2.515835 2.607751 -3.871966 3.963882
## Aug 2023    0.816445729 -1.745364 3.378255 -3.101504 4.734396
## Sep 2023   -0.729282678 -3.291111 1.832546 -4.647261 3.188696
## Oct 2023    0.109311042 -2.452537 2.671159 -3.808698 4.027320
## Nov 2023    1.549161845 -1.012708 4.111032 -2.368881 5.467204
## Dec 2023   -0.102321771 -2.664216 2.459572 -4.020400 3.815757
db_final <- data.frame(Actuals = wilmin_test, Forecast = fc_db$Forecast$mean)
db_final
##         Actuals     Forecast
## 1  -0.414516129 -0.117887969
## 2   0.640956221  0.408875586
## 3  -0.084504608  0.202621097
## 4  -1.080376344 -0.069843180
## 5   0.388575269  0.749109788
## 6  -0.220241935  0.230599917
## 7  -1.516989247 -0.008977434
## 8   0.503225806  0.761510373
## 9  -0.151236559 -0.784218034
## 10  1.715752688  0.054375687
## 11 -0.002419355  1.494226489
## 12 -0.277016129 -0.157257127
# 
# # ETS
fc_ets <- ets_fitting(wilmin_train,wilmin_test)

summary(fc_ets$Model)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0279 
## 
##   sigma:  1.9371
## 
##      AIC     AICc      BIC 
## 645.4659 645.6989 653.4844 
## 
## Training set error measures:
##                        ME     RMSE     MAE      MPE     MAPE      MASE
## Training set 0.0002985529 1.918885 1.49028 100.6797 102.9146 0.6818693
##                    ACF1
## Training set -0.3446098
ets_final <- data.frame(Actuals = wilmin_test, Forecast = fc_ets$Forecast$mean)
ets_final
##         Actuals    Forecast
## 1  -0.414516129 -0.02789109
## 2   0.640956221 -0.02789109
## 3  -0.084504608 -0.02789109
## 4  -1.080376344 -0.02789109
## 5   0.388575269 -0.02789109
## 6  -0.220241935 -0.02789109
## 7  -1.516989247 -0.02789109
## 8   0.503225806 -0.02789109
## 9  -0.151236559 -0.02789109
## 10  1.715752688 -0.02789109
## 11 -0.002419355 -0.02789109
## 12 -0.277016129 -0.02789109
# ARIMA fitting
fc_arima <- arima_fit(wilmin_train,wilmin_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,1) with zero mean 
## 
## Coefficients:
##          ar1      ma1
##       0.3660  -0.8341
## s.e.  0.1454   0.0932
## 
## sigma^2 = 2.992:  log likelihood = -209.74
## AIC=425.49   AICc=425.72   BIC=433.51
## 
## Training set error measures:
##                      ME     RMSE      MAE      MPE     MAPE      MASE
## Training set -0.1137101 1.713378 1.302999 73.17057 238.2405 0.5961797
##                     ACF1
## Training set -0.02974118
arima_final <- data.frame(Actuals = wilmin_test, Forecast = fc_arima$Forecast$mean)
arima_final
##         Actuals      Forecast
## 1  -0.414516129 -6.765647e-01
## 2   0.640956221 -2.476203e-01
## 3  -0.084504608 -9.062817e-02
## 4  -1.080376344 -3.316959e-02
## 5   0.388575269 -1.213995e-02
## 6  -0.220241935 -4.443181e-03
## 7  -1.516989247 -1.626189e-03
## 8   0.503225806 -5.951793e-04
## 9  -0.151236559 -2.178335e-04
## 10  1.715752688 -7.972631e-05
## 11 -0.002419355 -2.917955e-05
## 12 -0.277016129 -1.067961e-05
forecasts <- hw(wilmin_stat,seasonal = "additive")
forecast_2023(forecasts,wilmin_stat,"Wilmington, NC")
##               Jan          Feb          Mar          Apr          May
## 2022 -0.414516129  0.640956221 -0.084504608 -1.080376344  0.388575269
## 2023 -0.151079485  0.407079157 -0.033632624 -0.108591899  0.638205494
##               Jun          Jul          Aug          Sep          Oct
## 2022 -0.220241935 -1.516989247  0.503225806 -0.151236559  1.715752688
## 2023  0.165352937 -0.005692280  0.675875721 -1.050574351  0.263344782
##               Nov          Dec
## 2022 -0.002419355 -0.277016129
## 2023  1.209518165 -0.024759456

8 Urban Honolulu, HI

# 8 Urban Honolulu, HI
honolulu<- data[data$CBSA_NAME == 'Urban Honolulu, HI',c("year","PM25_Level_FINAL")]
honolulu <- ts(honolulu$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is non-stationary
adf_kpss_test(honolulu,"Urban Honolulu, HI")
## Warning in adf.test(ts_data): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.98, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.65721, Truncation lag parameter = 4, p-value = 0.01744
par(mfrow=c(2,2)) 
acf(honolulu, lag=50, main="ACF of Urban Honolulu, HI")
pacf(honolulu, lag=50, main="PACF of Urban Honolulu, HI")
acf(honolulu, lag.max = 12, main="SACF of Urban Honolulu, HI")
pacf(honolulu, lag.max = 12, main="SPACF of Urban Honolulu, HI")

# Differencing - stationary
honolulu_stat <- diff(honolulu)
adf_kpss_test(honolulu_stat,"Urban Honolulu, HI")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -6.6448, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.075474, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(honolulu_stat, lag=50, main="ACF of Urban Honolulu, HI")
pacf(honolulu_stat, lag=50, main="PACF of Urban Honolulu, HI")
acf(honolulu_stat, lag.max = 12, main="SACF of Urban Honolulu, HI")
pacf(honolulu_stat, lag.max = 12, main="SPACF of Urban Honolulu, HI")

# Split the data
splited <- train_test_split(honolulu_stat)
## Warning in window.default(x, ...): 'start' value not changed
honolulu_train <- splited$train
honolulu_test <- splited$test
honolulu_train
##              Jan         Feb         Mar         Apr         May         Jun
## 2013             -1.42704493  0.60682988  0.58074552 -1.45203584 -1.00063082
## 2014  0.67731183  0.74198541 -0.53031874  1.13969176  0.02487814 -2.86204480
## 2015  1.50564516  0.69062020 -2.38282450  0.29197133 -0.31535842 -1.11269713
## 2016  1.20645161 -1.70378198  2.18926585 -2.40453405 -2.60836918  0.26114695
## 2017  1.29301075  0.98897849  0.20564516 -1.09620072 -0.67288530 -1.45239247
## 2018  0.96505376 -0.05528994 -0.60143049 -0.05351792 -1.54314875  0.66575986
## 2019  1.27752688 -1.06970622 -0.11894969  0.86419176 -0.67790143  0.40712366
## 2020  0.91478495 -0.26923990 -0.78882462 -0.07240143 -0.11399642 -0.34422581
## 2021 -0.13075269  0.60319124 -0.13996544 -0.85356631  0.19270609 -0.94087276
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2013 -0.14141219 -0.47795699 -0.54179749  1.31695878  0.99559677 -1.37172581
## 2014 -0.12478315  1.04516278 -1.45699074  0.92267025  0.90677419 -0.25874552
## 2015 -0.36472222  1.54919355  1.13636201  0.43353047  1.58674731 -2.13379032
## 2016 -0.17727599 -0.71612903  0.11812724  1.02461470  0.95455197 -0.13976703
## 2017 -0.23362903 -0.28629032  1.07408602  1.35924731  0.02241935 -0.37887097
## 2018  0.85300358 -1.08473118 -1.03799462  0.33767204  0.40493907 -0.19456272
## 2019  0.02529570 -0.11989247 -0.03873656 -0.17239247  0.39555914  0.25556989
## 2020  0.17755914 -0.23263441  0.33618638  0.76241577 -0.68430466  0.45231541
## 2021  0.41667921  0.06736559 -0.15915591  0.22646774  0.12686559 -0.10482258
honolulu_test
##              Jan         Feb         Mar         Apr         May         Jun
## 2022  0.86118280 -0.46871736  0.93866359  1.02658244 -0.43884050 -1.58299283
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2022  0.21793907  0.02473118 -0.77600358  0.41327957  0.75505376  1.11089606
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(honolulu_train,honolulu_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0379 
## 
##   sigma:  0.9707
## 
##      AIC     AICc      BIC 
## 497.6136 497.8466 505.6321 
## 
## Error measures:
##                       ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 0.001984595 0.961602 0.737346 100.2901 100.2901 0.7798191
##                    ACF1
## Training set -0.1319029
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Feb 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Mar 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Apr 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## May 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Jun 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Jul 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Aug 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Sep 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
## Oct 2022    -0.03783528 -1.281859 1.206189 -1.940406 1.864735
ses_final <- data.frame(Actuals = honolulu_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##        Actuals    Forecast
## 1   0.86118280 -0.03783528
## 2  -0.46871736 -0.03783528
## 3   0.93866359 -0.03783528
## 4   1.02658244 -0.03783528
## 5  -0.43884050 -0.03783528
## 6  -1.58299283 -0.03783528
## 7   0.21793907 -0.03783528
## 8   0.02473118 -0.03783528
## 9  -0.77600358 -0.03783528
## 10  0.41327957 -0.03783528
# Double exponential smoothing
fc_db <- db_smooth_fitting(honolulu_train,honolulu_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
##     gamma = 3e-04 
## 
##   Initial states:
##     l = -0.0729 
##     b = 8e-04 
##     s = 0.9776 -0.5137 0.5534 0.7398 0.1405 -0.0776
##            0.041 -0.6647 -0.679 -0.2238 -0.1831 -0.1103
## 
##   sigma:  0.894
## 
##      AIC     AICc      BIC 
## 492.6904 499.5668 538.1285 
## 
## Error measures:
##                       ME      RMSE      MAE      MPE     MAPE      MASE
## Training set 0.003435491 0.8244808 0.631659 64.68019 170.8848 0.6680443
##                    ACF1
## Training set -0.2300245
## 
## Forecasts:
##          Point Forecast      Lo 80     Hi 80      Lo 95    Hi 95
## Jan 2022     0.99291036 -0.1528342 2.1386549 -0.7593548 2.745175
## Feb 2022    -0.09423420 -1.2399788 1.0515104 -1.8464994 1.658031
## Mar 2022    -0.16601447 -1.3117592 0.9797302 -1.9182798 1.586251
## Apr 2022    -0.20576636 -1.3515112 0.9399785 -1.9580319 1.546499
## May 2022    -0.66067931 -1.8064243 0.4850657 -2.4129451 1.091586
## Jun 2022    -0.64526928 -1.7910146 0.5004760 -2.3975355 1.106997
## Jul 2022     0.06135008 -1.0843956 1.2070958 -1.6909168 1.813617
## Aug 2022    -0.05635075 -1.2020970 1.0893955 -1.8086184 1.695917
## Sep 2022     0.16185424 -0.9838926 1.3076011 -1.5904143 1.914123
## Oct 2022     0.76245626 -0.3832914 1.9082039 -0.9898135 2.514726
## Nov 2022     0.57688658 -0.5688620 1.7226352 -1.1753846 2.329158
## Dec 2022    -0.48906812 -1.6348178 0.6566816 -2.2413411 1.263205
## Jan 2023     1.00277161 -0.1429800 2.1485232 -0.7495043 2.755048
## Feb 2023    -0.08437295 -1.2301261 1.0613802 -1.8366512 1.667905
## Mar 2023    -0.15615322 -1.3019081 0.9896017 -1.9084341 1.596128
## Apr 2023    -0.19590511 -1.3416620 0.9498518 -1.9481890 1.556379
## May 2023    -0.65081806 -1.7965772 0.4949410 -2.4031054 1.101469
## Jun 2023    -0.63540802 -1.7811696 0.5103536 -2.3876992 1.116883
## Jul 2023     0.07121133 -1.0745530 1.2169757 -1.6810841 1.823507
## Aug 2023    -0.04648949 -1.1922570 1.0992780 -1.7987896 1.705811
## Sep 2023     0.17171549 -0.9740554 1.3174863 -1.5805898 1.924021
## Oct 2023     0.77231751 -0.3734571 1.9180921 -0.9799935 2.524628
## Nov 2023     0.58674783 -0.5590308 1.7325265 -1.1655694 2.339065
## Dec 2023    -0.47920687 -1.6249899 0.6665762 -2.2315308 1.273117
db_final <- data.frame(Actuals = honolulu_test, Forecast = fc_db$Forecast$mean)
db_final
##        Actuals    Forecast
## 1   0.86118280  0.99291036
## 2  -0.46871736 -0.09423420
## 3   0.93866359 -0.16601447
## 4   1.02658244 -0.20576636
## 5  -0.43884050 -0.66067931
## 6  -1.58299283 -0.64526928
## 7   0.21793907  0.06135008
## 8   0.02473118 -0.05635075
## 9  -0.77600358  0.16185424
## 10  0.41327957  0.76245626
## 11  0.75505376  0.57688658
## 12  1.11089606 -0.48906812
# 
# # ETS
fc_ets <- ets_fitting(honolulu_train,honolulu_test)

summary(fc_ets$Model)
## ETS(A,N,A) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = -0.0331 
##     s = 0.9614 -0.4258 0.5473 0.7344 -0.0353 -0.0104
##            0.0762 -0.6292 -0.7109 -0.1799 -0.186 -0.1419
## 
##   sigma:  0.8813
## 
##      AIC     AICc      BIC 
## 487.9516 493.2263 528.0440 
## 
## Training set error measures:
##                       ME      RMSE       MAE      MPE     MAPE      MASE
## Training set 0.006159839 0.8216395 0.6303331 63.03012 167.2271 0.6666421
##                   ACF1
## Training set -0.230229
ets_final <- data.frame(Actuals = honolulu_test, Forecast = fc_ets$Forecast$mean)
ets_final
##        Actuals    Forecast
## 1   0.86118280  0.92840558
## 2  -0.46871736 -0.17496443
## 3   0.93866359 -0.21903216
## 4   1.02658244 -0.21291699
## 5  -0.43884050 -0.74394987
## 6  -1.58299283 -0.66233333
## 7   0.21793907  0.04319658
## 8   0.02473118 -0.04341935
## 9  -0.77600358 -0.06833704
## 10  0.41327957  0.70130852
## 11  0.75505376  0.51430584
## 12  1.11089606 -0.45881753
# ARIMA fitting
fc_arima <- arima_fit(honolulu_train,honolulu_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,1)(1,0,0)[12] with zero mean 
## 
## Coefficients:
##          ar1      ma1    sar1
##       0.6460  -0.9613  0.2635
## s.e.  0.0851   0.0291  0.0966
## 
## sigma^2 = 0.7744:  log likelihood = -137.47
## AIC=282.94   AICc=283.34   BIC=293.64
## 
## Training set error measures:
##                      ME      RMSE       MAE     MPE     MAPE      MASE
## Training set -0.1519852 0.8675952 0.6609416 82.5366 151.9951 0.6990137
##                     ACF1
## Training set -0.02015277
arima_final <- data.frame(Actuals = honolulu_test, Forecast = fc_arima$Forecast$mean)
arima_final
##        Actuals    Forecast
## 1   0.86118280  0.09887963
## 2  -0.46871736  0.24504164
## 3   0.93866359  0.01876089
## 4   1.02658244 -0.18893877
## 5  -0.43884050  0.07398593
## 6  -1.58299283 -0.23288270
## 7   0.21793907  0.11946496
## 8   0.02473118  0.02400605
## 9  -0.77600358 -0.03788820
## 10  0.41327957  0.06227600
## 11  0.75505376  0.03511058
## 12  1.11089606 -0.02652656
forecasts <- Arima(honolulu_stat, order=c(1,0,1), seasonal = list(order = c(1,0,0), period = 12)) 
forecast_2023(forecasts,honolulu_stat,"Urban Honolulu, HI")
##               Jan          Feb          Mar          Apr          May
## 2022  0.861182796 -0.468717358  0.938663594  1.026582437 -0.438840502
## 2023 -0.473000866 -0.574910327 -0.062138935  0.061328917 -0.245488295
##               Jun          Jul          Aug          Sep          Oct
## 2022 -1.582992832  0.217939068  0.024731183 -0.776003584  0.413279570
## 2023 -0.493433432 -0.006892723 -0.037588942 -0.229279357  0.081256619
##               Nov          Dec
## 2022  0.755053763  1.110896057
## 2023  0.173390622  0.267317604

9. Kahului-Wailuku-Lahaina, HI

# 9. Kahului-Wailuku-Lahaina, HI
kahului<- data[data$CBSA_NAME == 'Kahului-Wailuku-Lahaina, HI',c("year","PM25_Level_FINAL")]
kahului <- ts(kahului$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is stationary
adf_kpss_test(kahului,"Kahului-Wailuku-Lahaina, HI")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.1082, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 1.1495, Truncation lag parameter = 4, p-value = 0.01
par(mfrow=c(2,2)) 
acf(kahului, lag=50, main="ACF of Kahului-Wailuku-Lahaina, HI")
pacf(kahului, lag=50, main="PACF of Kahului-Wailuku-Lahaina, HI")
acf(kahului, lag.max = 12, main="SACF of Kahului-Wailuku-Lahaina, HI")
pacf(kahului, lag.max = 12, main="SPACF of Kahului-Wailuku-Lahaina, HI")

# Differencing - stationary
kahului_stat <- diff(kahului)
adf_kpss_test(kahului_stat,"Kahului-Wailuku-Lahaina, HI")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -7.3802, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.068621, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(kahului_stat, lag=50, main="ACF of Kahului-Wailuku-Lahaina, HI")
pacf(kahului_stat, lag=50, main="PACF of Kahului-Wailuku-Lahaina, HI")
acf(kahului_stat, lag.max = 12, main="SACF of Kahului-Wailuku-Lahaina, HI")
pacf(kahului_stat, lag.max = 12, main="SPACF of Kahului-Wailuku-Lahaina, HI")

# Split the data
splited <- train_test_split(kahului_stat)
## Warning in window.default(x, ...): 'start' value not changed
kahului_train <- splited$train
kahului_test <- splited$test
kahului_train
##               Jan          Feb          Mar          Apr          May
## 2013              -0.778974654  1.060963902  1.665663082 -1.941738351
## 2014  0.296236559 -0.171428571  0.217665131  0.159318996  0.389068100
## 2015  2.892204301 -0.197273425 -0.897887865  0.214435484  0.119973118
## 2016  2.237903226 -0.323210975  0.182888395 -1.844784946 -1.204946237
## 2017  1.348387097 -1.133054916  0.508323733  0.041021505  0.443118280
## 2018 -0.210215054  1.116186636 -0.297907066 -0.344910394 -0.207777778
## 2019  0.995430108 -1.845842934  1.047187020  0.970412186 -1.519336918
## 2020  0.345161290 -1.843874808  1.062692012 -0.163763441 -0.240000000
## 2021 -0.227956989  0.212346390  0.287653610 -0.772455197  0.197186380
##               Jun          Jul          Aug          Sep          Oct
## 2013 -0.719928315 -1.503189964  0.103225806 -1.270591398  1.316827957
## 2014 -2.365040323 -1.251626344  0.612587883 -1.585128205  0.596241039
## 2015 -1.311639785  0.276693548  0.837096774 -0.981845878 -0.335358423
## 2016 -0.705331541  0.516353047  0.657526882 -0.076379928  0.820197133
## 2017 -0.199229391 -0.212598566 -0.606451613  0.206827957  0.717903226
## 2018 -0.268888889  1.050609319 -1.802150538  0.154318996 -1.244641577
## 2019  0.546559140  3.594838710 -3.090322581 -0.326182796 -0.168440860
## 2020  0.121111111  0.483727599 -0.445698925  0.121971326  0.185017921
## 2021 -0.613297491  0.379426523 -0.375268817 -0.105268817 -0.414220430
##               Nov          Dec
## 2013  1.273172043 -0.681236559
## 2014  1.500008961 -0.473978495
## 2015 -0.674363799  0.005815412
## 2016  0.468413978 -0.688844086
## 2017 -0.525681004  0.124068100
## 2018  1.415752688 -0.472741935
## 2019 -0.442670251  0.912562724
## 2020 -1.261267921  1.566106631
## 2021  0.607553763  0.043655914
kahului_test
##               Jan          Feb          Mar          Apr          May
## 2022  0.066666667 -0.239429724  0.697494240  1.911612903  0.049677419
##               Jun          Jul          Aug          Sep          Oct
## 2022 -1.886344086  0.557311828 -0.280645161 -0.543333333  0.367526882
##               Nov          Dec
## 2022  0.912473118  0.008494624
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(kahului_train,kahului_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0388 
## 
##   sigma:  1.0422
## 
##      AIC     AICc      BIC 
## 512.8187 513.0517 520.8371 
## 
## Error measures:
##                        ME     RMSE       MAE      MPE     MAPE      MASE
## Training set 0.0001515376 1.032411 0.7723166 108.3616 108.3616 0.7968642
##                  ACF1
## Training set -0.27412
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003920
## Feb 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003920
## Mar 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003920
## Apr 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## May 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## Jun 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## Jul 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## Aug 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## Sep 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
## Oct 2022    -0.03874876 -1.374378 1.296881 -2.081418 2.003921
ses_final <- data.frame(Actuals = kahului_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##        Actuals    Forecast
## 1   0.06666667 -0.03874876
## 2  -0.23942972 -0.03874876
## 3   0.69749424 -0.03874876
## 4   1.91161290 -0.03874876
## 5   0.04967742 -0.03874876
## 6  -1.88634409 -0.03874876
## 7   0.55731183 -0.03874876
## 8  -0.28064516 -0.03874876
## 9  -0.54333333 -0.03874876
## 10  0.36752688 -0.03874876
# Double exponential smoothing
fc_db <- db_smooth_fitting(kahului_train,kahului_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 2e-04 
##     beta  = 2e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = -0.1161 
##     b = 0.0012 
##     s = 0.937 0.1324 0.2573 0.2328 -0.4874 -0.4749
##            0.6664 -0.5648 -0.2429 -0.1506 0.274 -0.5794
## 
##   sigma:  1.0153
## 
##      AIC     AICc      BIC 
## 519.9109 526.7873 565.3490 
## 
## Error measures:
##                      ME      RMSE       MAE      MPE     MAPE      MASE
## Training set 0.01126132 0.9363155 0.7304579 78.36066 151.5814 0.7536751
##                   ACF1
## Training set -0.278708
## 
## Forecasts:
##          Point Forecast      Lo 80     Hi 80      Lo 95    Hi 95
## Jan 2022     0.97594698 -0.3252094 2.2771034 -1.0140000 2.965894
## Feb 2022    -0.53901518 -1.8401716 0.7621413 -2.5289623 1.450932
## Mar 2022     0.31573497 -0.9854217 1.6168916 -1.6742124 2.305682
## Apr 2022    -0.10736483 -1.4085218 1.1937921 -2.0973126 1.882583
## May 2022    -0.19855042 -1.4997078 1.1026070 -2.1884989 1.791398
## Jun 2022    -0.51895078 -1.8201088 0.7822073 -2.5089003 1.470999
## Jul 2022     0.71335281 -0.5878061 2.0145118 -1.2765981 2.703304
## Aug 2022    -0.42621248 -1.7273726 0.8749476 -2.4161651 1.563740
## Sep 2022    -0.43735273 -1.7385143 0.8638089 -2.4273077 1.552602
## Oct 2022     0.28416315 -1.0170003 1.5853266 -1.7057946 2.274121
## Nov 2022     0.31012293 -0.9910427 1.6112886 -1.6798382 2.300084
## Dec 2022     0.18652420 -1.1146441 1.4876925 -1.8034409 2.176489
## Jan 2023     0.99258385 -0.3085878 2.2937555 -0.9973865 2.982554
## Feb 2023    -0.52237831 -1.8235535 0.7787969 -2.5123541 1.467597
## Mar 2023     0.33237184 -0.9688075 1.6335512 -1.6576102 2.322354
## Apr 2023    -0.09072796 -1.3919120 1.2104561 -2.0807172 1.899261
## May 2023    -0.18191354 -1.4831029 1.1192758 -2.1719109 1.808084
## Jun 2023    -0.50231391 -1.8035091 0.7988813 -2.4923203 1.487692
## Jul 2023     0.72998968 -0.5712121 2.0311915 -1.2600268 2.720006
## Aug 2023    -0.40957561 -1.7107847 0.8916335 -2.3996033 1.580452
## Sep 2023    -0.42071586 -1.7219331 0.8805013 -2.4107558 1.569324
## Oct 2023     0.30080002 -1.0004260 1.6020261 -1.6892535 2.290854
## Nov 2023     0.32675980 -0.9744759 1.6279955 -1.6633085 2.316828
## Dec 2023     0.20316107 -1.0980852 1.5044073 -1.7869233 2.193245
db_final <- data.frame(Actuals = kahului_test, Forecast = fc_db$Forecast$mean)
db_final
##         Actuals   Forecast
## 1   0.066666667  0.9759470
## 2  -0.239429724 -0.5390152
## 3   0.697494240  0.3157350
## 4   1.911612903 -0.1073648
## 5   0.049677419 -0.1985504
## 6  -1.886344086 -0.5189508
## 7   0.557311828  0.7133528
## 8  -0.280645161 -0.4262125
## 9  -0.543333333 -0.4373527
## 10  0.367526882  0.2841631
## 11  0.912473118  0.3101229
## 12  0.008494624  0.1865242
# 
# # ETS
fc_ets <- ets_fitting(kahului_train,kahului_test)

summary(fc_ets$Model)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0389 
## 
##   sigma:  1.0422
## 
##      AIC     AICc      BIC 
## 512.8187 513.0517 520.8371 
## 
## Training set error measures:
##                        ME     RMSE       MAE      MPE     MAPE      MASE
## Training set 0.0002727829 1.032411 0.7723178 108.3877 108.3877 0.7968654
##                  ACF1
## Training set -0.27412
ets_final <- data.frame(Actuals = kahului_test, Forecast = fc_ets$Forecast$mean)
ets_final
##         Actuals    Forecast
## 1   0.066666667 -0.03886934
## 2  -0.239429724 -0.03886934
## 3   0.697494240 -0.03886934
## 4   1.911612903 -0.03886934
## 5   0.049677419 -0.03886934
## 6  -1.886344086 -0.03886934
## 7   0.557311828 -0.03886934
## 8  -0.280645161 -0.03886934
## 9  -0.543333333 -0.03886934
## 10  0.367526882 -0.03886934
## 11  0.912473118 -0.03886934
## 12  0.008494624 -0.03886934
# ARIMA fitting
fc_arima <- arima_fit(kahului_train,kahului_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,1)(1,0,0)[12] with zero mean 
## 
## Coefficients:
##          ar1      ma1    sar1
##       0.5534  -0.9251  0.3380
## s.e.  0.1042   0.0496  0.0949
## 
## sigma^2 = 0.8101:  log likelihood = -140.06
## AIC=288.11   AICc=288.51   BIC=298.81
## 
## Training set error measures:
##                      ME     RMSE       MAE      MPE    MAPE      MASE
## Training set -0.1431344 0.887346 0.6866224 25.36606 179.633 0.7084462
##                     ACF1
## Training set -0.07168266
arima_final <- data.frame(Actuals = kahului_test, Forecast = fc_arima$Forecast$mean)
arima_final
##         Actuals    Forecast
## 1   0.066666667  0.06152002
## 2  -0.239429724  0.14845193
## 3   0.697494240  0.13965855
## 4   1.911612903 -0.23757336
## 5   0.049677419  0.07964208
## 6  -1.886344086 -0.20007810
## 7   0.557311828  0.13221473
## 8  -0.280645161 -0.12462383
## 9  -0.543333333 -0.03435746
## 10  0.367526882 -0.13931687
## 11  0.912473118  0.20570534
## 12  0.008494624  0.01496092
forecasts <- Arima(kahului_stat, order=c(1,0,1), seasonal = list(order = c(1,0,0), period = 12)) 
forecast_2023(forecasts,kahului_stat,"Kahului-Wailuku-Lahaina, HI")
##               Jan          Feb          Mar          Apr          May
## 2022  0.066666667 -0.239429724  0.697494240  1.911612903  0.049677419
## 2023 -0.616208274 -0.473254191 -0.017676175  0.470900686 -0.083917253
##               Jun          Jul          Aug          Sep          Oct
## 2022 -1.886344086  0.557311828 -0.280645161 -0.543333333  0.367526882
## 2023 -0.684577932  0.136893858 -0.125116149 -0.203508484  0.099695141
##               Nov          Dec
## 2022  0.912473118  0.008494624
## 2023  0.281152490 -0.013345823

10. Bangor, ME

# 10. Bangor, ME
bangor <- data[data$CBSA_NAME == 'Bangor, ME',c("year","PM25_Level_FINAL")]
bangor <- ts(bangor$PM25_Level_FINAL, start = c(2013,01), end = c(2022,12), frequency = 12)

# Result is not stationary
adf_kpss_test(bangor,"Bangor, ME")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.4675, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 1.1557, Truncation lag parameter = 4, p-value = 0.01
par(mfrow=c(2,2)) 
acf(bangor, lag=50, main="ACF of Bangor, ME")
pacf(bangor, lag=50, main="PACF of Bangor, ME")
acf(bangor, lag.max = 12, main="SACF of Bangor, ME")
pacf(bangor, lag.max = 12, main="SPACF of Bangor, ME")

# Differencing - stationary
bangor_stat <- diff(bangor)
adf_kpss_test(bangor_stat,"Bangor, ME")
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -8.4554, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(ts_data): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 0.025936, Truncation lag parameter = 4, p-value = 0.1
par(mfrow=c(2,2)) 
acf(bangor_stat, lag=50, main="ACF of Bangor, ME")
pacf(bangor_stat, lag=50, main="PACF of Bangor, ME")
acf(bangor_stat, lag.max = 12, main="SACF of Bangor, ME")
pacf(bangor_stat, lag.max = 12, main="SPACF of Bangor, ME")

# Split the data
splited <- train_test_split(bangor_stat)
## Warning in window.default(x, ...): 'start' value not changed
bangor_train <- splited$train
bangor_test <- splited$test
bangor_train
##               Jan          Feb          Mar          Apr          May
## 2013              -1.039439324 -2.247657450  0.835197133 -1.324444444
## 2014  2.175160256  4.255769231 -2.331451613 -2.701881720 -1.788440860
## 2015  3.869892473  0.307469278 -0.716071429 -3.826666667 -0.362043011
## 2016  2.132795699 -1.539710790 -1.378299963 -0.591182796 -1.511908602
## 2017 -4.839784946 -0.266873080 -1.072374232 -1.558548387 -1.119677419
## 2018  1.011720430 -0.283041475 -3.918786482  0.471659498  0.652050179
## 2019 -1.002634409 -0.051048387 -0.275188172 -2.358256272 -0.184969534
## 2020  0.569892473 -0.051223582 -1.810604375 -0.494457885 -0.086993728
## 2021 -0.074596774  0.156221198 -1.115898618 -1.300053763  1.540645161
##               Jun          Jul          Aug          Sep          Oct
## 2013  2.347777778  1.728566308 -1.816666667 -2.185788530  1.153530466
## 2014  0.886774194  2.556774194 -1.840322581 -0.794784946 -0.453602151
## 2015 -0.889623656  4.905215054 -2.964247312  2.515143369 -1.608691756
## 2016  0.697741935 -0.806881720 -0.609784946 -0.022672414  2.096032629
## 2017  2.350510753  1.107822581  0.051612903  0.102231183 -2.254274194
## 2018  0.445394265  1.802240143  0.343602151 -2.354175627 -0.110985663
## 2019  1.386608423  3.519950717 -1.804301075 -0.614121864 -0.796630824
## 2020  1.601021505  0.021290323  0.248387097 -1.506344086  1.343440860
## 2021  1.681021505  0.002580645  0.078225806 -3.207473118  0.671182796
##               Nov          Dec
## 2013  1.994802867 -3.428151709
## 2014  4.976935484 -4.146290323
## 2015  2.665636201 -0.434991039
## 2016  1.943584229  2.563136201
## 2017  1.938718638  0.561281362
## 2018  1.110763441  0.652516129
## 2019  0.141630824  1.005143369
## 2020  1.284892473 -1.249005376
## 2021  1.072150538 -0.285053763
bangor_test
##              Jan         Feb         Mar         Apr         May         Jun
## 2022  3.48548387 -2.51745392 -0.12448157 -2.17849462  0.68194444  0.72805556
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2022  2.94430108 -1.66451613 -1.24978495  0.58204301  0.01795699  0.16053763
# Reset plot size
dev.off()
## null device 
##           1
# Simple exponential smoothing
fc_ses <- exp_smooth_fitting(bangor_train,bangor_test)

summary(fc_ses$Model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = -0.0363 
## 
##   sigma:  1.8889
## 
##      AIC     AICc      BIC 
## 640.0816 640.3146 648.1001 
## 
## Error measures:
##                        ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 0.0001437482 1.871207 1.446691 112.0699 113.2083 0.8758918
##                    ACF1
## Training set -0.2049843
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95   Hi 95
## Jan 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Feb 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Mar 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Apr 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## May 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Jun 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Jul 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Aug 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Sep 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
## Oct 2022    -0.03630272 -2.457082 2.384477 -3.738566 3.66596
ses_final <- data.frame(Actuals = bangor_test[1:10], Forecast = fc_ses$Forecast$mean)
ses_final
##       Actuals    Forecast
## 1   3.4854839 -0.03630272
## 2  -2.5174539 -0.03630272
## 3  -0.1244816 -0.03630272
## 4  -2.1784946 -0.03630272
## 5   0.6819444 -0.03630272
## 6   0.7280556 -0.03630272
## 7   2.9443011 -0.03630272
## 8  -1.6645161 -0.03630272
## 9  -1.2497849 -0.03630272
## 10  0.5820430 -0.03630272
# Double exponential smoothing
fc_db <- db_smooth_fitting(bangor_train,bangor_test)

summary(fc_db$Model)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
##     gamma = 2e-04 
## 
##   Initial states:
##     l = -0.1307 
##     b = 0.001 
##     s = 0.4575 -0.5891 2.0123 -0.0665 -0.5932 -1.0341
##            1.7742 1.0703 -0.1986 -1.5118 -1.4964 0.1754
## 
##   sigma:  1.6501
## 
##      AIC     AICc      BIC 
## 623.8353 630.7117 669.2734 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.02872839 1.521696 1.147373 -657.7763 899.1385 0.6946716
##                    ACF1
## Training set -0.3893301
## 
## Forecasts:
##          Point Forecast       Lo 80     Hi 80     Lo 95    Hi 95
## Jan 2022     0.47453108 -1.64010261 2.5891648 -2.759522 3.708584
## Feb 2022     0.19365394 -1.92097980 2.3082877 -3.040399 3.427707
## Mar 2022    -1.47694459 -3.59157844 0.6376893 -4.710998 1.757109
## Apr 2022    -1.49049116 -3.60512523 0.6241429 -4.724545 1.743563
## May 2022    -0.17666883 -2.29130322 1.9379656 -3.410723 3.057385
## Jun 2022     1.09404665 -1.02058821 3.2086815 -2.140008 4.328102
## Jul 2022     1.79896599 -0.31566950 3.9136015 -1.435090 5.033022
## Aug 2022    -1.00763834 -3.12227466 1.1069980 -4.241696 2.226419
## Sep 2022    -0.56600097 -2.68063834 1.5486364 -3.800060 2.668058
## Oct 2022    -0.03736222 -2.15200089 2.0772764 -3.271423 3.196699
## Nov 2022     2.04246475 -0.07217549 4.1571050 -1.191598 5.276528
## Dec 2022    -0.55737640 -2.67201851 1.5572657 -3.791442 2.676690
## Jan 2023     0.49057032 -1.62407452 2.6052152 -2.743500 3.724641
## Feb 2023     0.20969317 -1.90495421 2.3243406 -3.024381 3.443767
## Mar 2023    -1.46090535 -3.57555565 0.6537449 -4.694984 1.773173
## Apr 2023    -1.47445193 -3.58910554 0.6402017 -4.708536 1.759632
## May 2023    -0.16062960 -2.27528696 1.9540278 -3.394719 3.073460
## Jun 2023     1.11008588 -1.00457568 3.2247474 -2.124010 4.344182
## Jul 2023     1.81500523 -0.29966101 3.9296715 -1.419098 5.049108
## Aug 2023    -0.99159911 -3.10627053 1.1230723 -4.225710 2.242512
## Sep 2023    -0.54996173 -2.66463887 1.5647154 -3.784081 2.684158
## Oct 2023    -0.02132299 -2.13600640 2.0933604 -3.255452 3.212806
## Nov 2023     2.05850399 -0.05618627 4.1731942 -1.175636 5.292644
## Dec 2023    -0.54133717 -2.65603489 1.5733606 -3.775488 2.692814
db_final <- data.frame(Actuals = bangor_test, Forecast = fc_db$Forecast$mean)
db_final
##        Actuals    Forecast
## 1   3.48548387  0.47453108
## 2  -2.51745392  0.19365394
## 3  -0.12448157 -1.47694459
## 4  -2.17849462 -1.49049116
## 5   0.68194444 -0.17666883
## 6   0.72805556  1.09404665
## 7   2.94430108  1.79896599
## 8  -1.66451613 -1.00763834
## 9  -1.24978495 -0.56600097
## 10  0.58204301 -0.03736222
## 11  0.01795699  2.04246475
## 12  0.16053763 -0.55737640
# 
# ETS
fc_ets <- ets_fitting(bangor_train,bangor_test)

summary(fc_ets$Model)
## ETS(A,N,A) 
## 
## Call:
##  ets(y = train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = -7e-04 
##     s = 0.5431 -0.5547 1.9626 0.0139 -0.6579 -1.0997
##            1.8695 1.0059 -0.3512 -1.375 -1.5631 0.2066
## 
##   sigma:  1.6281
## 
##      AIC     AICc      BIC 
## 619.2898 624.5645 659.3822 
## 
## Training set error measures:
##                       ME     RMSE     MAE       MPE     MAPE      MASE
## Training set -0.03030034 1.517821 1.15001 -689.0743 940.8811 0.6962677
##                    ACF1
## Training set -0.3910071
ets_final <- data.frame(Actuals = bangor_test, Forecast = fc_ets$Forecast$mean)
ets_final
##        Actuals    Forecast
## 1   3.48548387  0.54202732
## 2  -2.51745392  0.20549576
## 3  -0.12448157 -1.56418489
## 4  -2.17849462 -1.37597754
## 5   0.68194444 -0.35235212
## 6   0.72805556  1.00504290
## 7   2.94430108  1.86826451
## 8  -1.66451613 -1.10062651
## 9  -1.24978495 -0.65911265
## 10  0.58204301  0.01281342
## 11  0.01795699  1.96152251
## 12  0.16053763 -0.55576090
# ARIMA fitting
fc_arima <- arima_fit(bangor_train,bangor_test)

summary(fc_arima$Model)
## Series: train 
## ARIMA(1,0,1)(1,0,1)[12] with zero mean 
## 
## Coefficients:
##          ar1      ma1    sar1     sma1
##       0.2942  -0.9492  0.8277  -0.5174
## s.e.  0.1045   0.0353  0.1651   0.2751
## 
## sigma^2 = 2.052:  log likelihood = -190.96
## AIC=391.92   AICc=392.51   BIC=405.28
## 
## Training set error measures:
##                      ME     RMSE      MAE      MPE     MAPE      MASE
## Training set -0.1577987 1.405533 1.094809 179.9616 334.8423 0.6628465
##                      ACF1
## Training set -0.003201261
arima_final <- data.frame(Actuals = bangor_test, Forecast = fc_arima$Forecast$mean)
arima_final
##        Actuals   Forecast
## 1   3.48548387  0.4305973
## 2  -2.51745392  0.1464600
## 3  -0.12448157 -0.8459277
## 4  -2.17849462 -0.7190792
## 5   0.68194444  0.4269065
## 6   0.72805556  0.9768640
## 7   2.94430108  0.4316424
## 8  -1.66451613 -0.1046218
## 9  -1.24978495 -1.3792689
## 10  0.58204301  0.3181791
## 11  0.01795699  0.7010690
## 12  0.16053763 -0.1584427
forecasts <- Arima(bangor_stat, order=c(1,0,1), seasonal = list(order = c(1,0,1), period = 12)) 
forecast_2023(forecasts,bangor_stat,"Bangor, ME")
##              Jan         Feb         Mar         Apr         May         Jun
## 2022  3.48548387 -2.51745392 -0.12448157 -2.17849462  0.68194444  0.72805556
## 2023  0.51469634 -0.58445547 -0.90116048 -1.11497581  0.19855051  0.88963801
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2022  2.94430108 -1.66451613 -1.24978495  0.58204301  0.01795699  0.16053763
## 2023  1.24727810 -0.60924767 -1.09221126  0.22990550  0.76434151 -0.08533189